kmp_runtime.cpp source code [openmp/runtime/src/kmp_runtime.cpp]

1	/*
2	* kmp_runtime.cpp -- KPTS runtime support library
3	*/
4
5	//===----------------------------------------------------------------------===//
6	//
7	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8	// See https://llvm.org/LICENSE.txt for license information.
9	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "kmp.h"
14	#include "kmp_affinity.h"
15	#include "kmp_atomic.h"
16	#include "kmp_environment.h"
17	#include "kmp_error.h"
18	#include "kmp_i18n.h"
19	#include "kmp_io.h"
20	#include "kmp_itt.h"
21	#include "kmp_settings.h"
22	#include "kmp_stats.h"
23	#include "kmp_str.h"
24	#include "kmp_wait_release.h"
25	#include "kmp_wrapper_getpid.h"
26	#include "kmp_dispatch.h"
27	#include "kmp_utils.h"
28	#if KMP_USE_HIER_SCHED
29	#include "kmp_dispatch_hier.h"
30	#endif
31
32	#if OMPT_SUPPORT
33	#include "ompt-specific.h"
34	#endif
35	#if OMPD_SUPPORT
36	#include "ompd-specific.h"
37	#endif
38
39	#if OMP_PROFILING_SUPPORT
40	#include "llvm/Support/TimeProfiler.h"
41	static char ProfileTraceFile = nullptr*;
42	#endif
43
44	/ these are temporary issues to be dealt with /
45	#define KMP_USE_PRCTL 0
46
47	#if KMP_OS_WINDOWS
48	#include <process.h>
49	#endif
50
51	#ifndef KMP_USE_SHM
52	// Windows and WASI do not need these include files as they don't use shared
53	// memory.
54	#else
55	#include <sys/mman.h>
56	#include <sys/stat.h>
57	#include <fcntl.h>
58	#define SHM_SIZE 1024
59	#endif
60
61	#if defined(KMP_GOMP_COMPAT)
62	char const __kmp_version_alt_comp[] =
63	KMP_VERSION_PREFIX "alternative compiler support: yes";
64	#endif /* defined(KMP_GOMP_COMPAT) */
65
66	char const __kmp_version_omp_api[] =
67	KMP_VERSION_PREFIX "API version: 5.0 (201611)";
68
69	#ifdef KMP_DEBUG
70	char const __kmp_version_lock[] =
71	KMP_VERSION_PREFIX "lock type: run time selectable";
72	#endif /* KMP_DEBUG */
73
74	#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
75
76	/ ------------------------------------------------------------------------ /
77
78	#if KMP_USE_MONITOR
79	kmp_info_t __kmp_monitor;
80	#endif
81
82	/ Forward declarations /
83
84	void __kmp_cleanup(void);
85
86	static void __kmp_initialize_info(kmp_info_t , kmp_team_t , int tid,
87	int gtid);
88	static void __kmp_initialize_team(kmp_team_t team, int* new_nproc,
89	kmp_internal_control_t *new_icvs,
90	ident_t *loc);
91	#if KMP_AFFINITY_SUPPORTED
92	static void __kmp_partition_places(kmp_team_t *team,
93	int update_master_only = `0`);
94	#endif
95	static void __kmp_do_serial_initialize(void);
96	void __kmp_fork_barrier(int gtid, int tid);
97	void __kmp_join_barrier(int gtid);
98	void __kmp_setup_icv_copy(kmp_team_t team, int* new_nproc,
99	kmp_internal_control_t new_icvs, ident_t loc);
100
101	#ifdef USE_LOAD_BALANCE
102	static int __kmp_load_balance_nproc(kmp_root_t root, int* set_nproc);
103	#endif
104
105	static int __kmp_expand_threads(int nNeed);
106	#if KMP_OS_WINDOWS
107	static int __kmp_unregister_root_other_thread(int gtid);
108	#endif
109	static void __kmp_reap_thread(kmp_info_t thread, int* is_root);
110	kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
111
112	void __kmp_resize_dist_barrier(kmp_team_t team, int* old_nthreads,
113	int new_nthreads);
114	void __kmp_add_threads_to_team(kmp_team_t team, int* new_nthreads);
115
116	static kmp_nested_nthreads_t __kmp_override_nested_nth(kmp_info_t thr,
117	int level) {
118	kmp_nested_nthreads_t *new_nested_nth =
119	(kmp_nested_nthreads_t *)KMP_INTERNAL_MALLOC(
120	sizeof(kmp_nested_nthreads_t));
121	int new_size = level + thr->th.th_set_nested_nth_sz;
122	new_nested_nth->nth = (int )KMP_INTERNAL_MALLOC(new_size sizeof(int));
123	for (int i = `0`; i < level + `1`; ++i)
124	new_nested_nth->nth[i] = `0`;
125	for (int i = level + `1`, j = `1`; i < new_size; ++i, ++j)
126	new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j];
127	new_nested_nth->size = new_nested_nth->used = new_size;
128	return new_nested_nth;
129	}
130
131	/ Calculate the identifier of the current thread /
132	/ fast (and somewhat portable) way to get unique identifier of executing*
133	thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. /*
134	int __kmp_get_global_thread_id() {
135	int i;
136	kmp_info_t **other_threads;
137	size_t stack_data;
138	char *stack_addr;
139	size_t stack_size;
140	char *stack_base;
141
142	KA_TRACE(
143	`1000`,
144	("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
145	__kmp_nth, __kmp_all_nth));
146
147	/ JPH - to handle the case where __kmpc_end(0) is called immediately prior to*
148	a parallel region, made it return KMP_GTID_DNE to force serial_initialize
149	by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
150	__kmp_init_gtid for this to work. /*
151
152	if (!TCR_4(__kmp_init_gtid))
153	return KMP_GTID_DNE;
154
155	#ifdef KMP_TDATA_GTID
156	if (TCR_4(__kmp_gtid_mode) >= `3`) {
157	KA_TRACE(`1000`, ("*** __kmp_get_global_thread_id: using TDATA\n"));
158	return __kmp_gtid;
159	}
160	#endif
161	if (TCR_4(__kmp_gtid_mode) >= `2`) {
162	KA_TRACE(`1000`, ("*** __kmp_get_global_thread_id: using keyed TLS\n"));
163	return __kmp_gtid_get_specific();
164	}
165	KA_TRACE(`1000`, ("*** __kmp_get_global_thread_id: using internal alg.\n"));
166
167	stack_addr = (char *)&stack_data;
168	other_threads = __kmp_threads;
169
170	/ ATT: The code below is a source of potential bugs due to unsynchronized*
171	access to __kmp_threads array. For example:
172	1. Current thread loads other_threads[i] to thr and checks it, it is
173	non-NULL.
174	2. Current thread is suspended by OS.
175	3. Another thread unregisters and finishes (debug versions of free()
176	may fill memory with something like 0xEF).
177	4. Current thread is resumed.
178	5. Current thread reads junk from thr.*
179	TODO: Fix it. --ln /*
180
181	for (i = `0`; i < __kmp_threads_capacity; i++) {
182
183	kmp_info_t thr = (kmp_info_t )TCR_SYNC_PTR(other_threads[i]);
184	if (!thr)
185	continue;
186
187	stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
188	stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
189
190	/ stack grows down -- search through all of the active threads /
191
192	if (stack_addr <= stack_base) {
193	size_t stack_diff = stack_base - stack_addr;
194
195	if (stack_diff <= stack_size) {
196	/ The only way we can be closer than the allocated /
197	/ stack size is if we are running on this thread. /
198	// __kmp_gtid_get_specific can return negative value because this
199	// function can be called by thread destructor. However, before the
200	// thread destructor is called, the value of the corresponding
201	// thread-specific data will be reset to NULL.
202	KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < `0` \|\|
203	__kmp_gtid_get_specific() == i);
204	return i;
205	}
206	}
207	}
208
209	/ get specific to try and determine our gtid /
210	KA_TRACE(`1000`,
211	("*** __kmp_get_global_thread_id: internal alg. failed to find "
212	"thread, using TLS\n"));
213	i = __kmp_gtid_get_specific();
214
215	/fprintf( stderr, "=== %d\n", i ); / / GROO /
216
217	/ if we havn't been assigned a gtid, then return code /
218	if (i < `0`)
219	return i;
220
221	// other_threads[i] can be nullptr at this point because the corresponding
222	// thread could have already been destructed. It can happen when this function
223	// is called in end library routine.
224	if (!TCR_SYNC_PTR(other_threads[i]))
225	return i;
226
227	/ dynamically updated stack window for uber threads to avoid get_specific*
228	call /*
229	if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
230	KMP_FATAL(StackOverflow, i);
231	}
232
233	stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
234	if (stack_addr > stack_base) {
235	TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
236	TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
237	other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
238	stack_base);
239	} else {
240	TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
241	stack_base - stack_addr);
242	}
243
244	/ Reprint stack bounds for ubermaster since they have been refined /
245	if (__kmp_storage_map) {
246	char stack_end = (char* *)other_threads[i]->th.th_info.ds.ds_stackbase;
247	char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
248	__kmp_print_storage_map_gtid(gtid: i, p1: stack_beg, p2: stack_end,
249	size: other_threads[i]->th.th_info.ds.ds_stacksize,
250	format: "th_%d stack (refinement)", i);
251	}
252	return i;
253	}
254
255	int __kmp_get_global_thread_id_reg() {
256	int gtid;
257
258	if (!__kmp_init_serial) {
259	gtid = KMP_GTID_DNE;
260	} else
261	#ifdef KMP_TDATA_GTID
262	if (TCR_4(__kmp_gtid_mode) >= `3`) {
263	KA_TRACE(`1000`, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"));
264	gtid = __kmp_gtid;
265	} else
266	#endif
267	if (TCR_4(__kmp_gtid_mode) >= `2`) {
268	KA_TRACE(`1000`, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
269	gtid = __kmp_gtid_get_specific();
270	} else {
271	KA_TRACE(`1000`,
272	("*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
273	gtid = __kmp_get_global_thread_id();
274	}
275
276	/ we must be a new uber master sibling thread /
277	if (gtid == KMP_GTID_DNE) {
278	KA_TRACE(`10`,
279	("__kmp_get_global_thread_id_reg: Encountered new root thread. "
280	"Registering a new gtid.\n"));
281	__kmp_acquire_bootstrap_lock(lck: &__kmp_initz_lock);
282	if (!__kmp_init_serial) {
283	__kmp_do_serial_initialize();
284	gtid = __kmp_gtid_get_specific();
285	} else {
286	gtid = __kmp_register_root(FALSE);
287	}
288	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
289	/__kmp_printf( "+++ %d\n", gtid ); / / GROO /
290	}
291
292	KMP_DEBUG_ASSERT(gtid >= `0`);
293
294	return gtid;
295	}
296
297	/ caller must hold forkjoin_lock /
298	void __kmp_check_stack_overlap(kmp_info_t *th) {
299	int f;
300	char *stack_beg = NULL;
301	char *stack_end = NULL;
302	int gtid;
303
304	KA_TRACE(`10`, ("__kmp_check_stack_overlap: called\n"));
305	if (__kmp_storage_map) {
306	stack_end = (char *)th->th.th_info.ds.ds_stackbase;
307	stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
308
309	gtid = __kmp_gtid_from_thread(thr: th);
310
311	if (gtid == KMP_GTID_MONITOR) {
312	__kmp_print_storage_map_gtid(
313	gtid, p1: stack_beg, p2: stack_end, size: th->th.th_info.ds.ds_stacksize,
314	format: "th_%s stack (%s)", "mon",
315	(th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
316	} else {
317	__kmp_print_storage_map_gtid(
318	gtid, p1: stack_beg, p2: stack_end, size: th->th.th_info.ds.ds_stacksize,
319	format: "th_%d stack (%s)", gtid,
320	(th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
321	}
322	}
323
324	/ No point in checking ubermaster threads since they use refinement and*
325	* cannot overlap */
326	gtid = __kmp_gtid_from_thread(thr: th);
327	if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
328	KA_TRACE(`10`,
329	("__kmp_check_stack_overlap: performing extensive checking\n"));
330	if (stack_beg == NULL) {
331	stack_end = (char *)th->th.th_info.ds.ds_stackbase;
332	stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
333	}
334
335	for (f = `0`; f < __kmp_threads_capacity; f++) {
336	kmp_info_t f_th = (kmp_info_t )TCR_SYNC_PTR(__kmp_threads[f]);
337
338	if (f_th && f_th != th) {
339	char *other_stack_end =
340	(char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
341	char *other_stack_beg =
342	other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
343	if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) \|\|
344	(stack_end > other_stack_beg && stack_end < other_stack_end)) {
345
346	/ Print the other stack values before the abort /
347	if (__kmp_storage_map)
348	__kmp_print_storage_map_gtid(
349	gtid: -`1`, p1: other_stack_beg, p2: other_stack_end,
350	size: (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
351	format: "th_%d stack (overlapped)", __kmp_gtid_from_thread(thr: f_th));
352
353	__kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
354	__kmp_msg_null);
355	}
356	}
357	}
358	}
359	KA_TRACE(`10`, ("__kmp_check_stack_overlap: returning\n"));
360	}
361
362	/ ------------------------------------------------------------------------ /
363
364	void __kmp_infinite_loop(void) {
365	static int done = FALSE;
366
367	while (!done) {
368	KMP_YIELD(TRUE);
369	}
370	}
371
372	#define MAX_MESSAGE 512
373
374	void __kmp_print_storage_map_gtid(int gtid, void p1, void* *p2, size_t size,
375	char const *format, ...) {
376	char buffer[MAX_MESSAGE];
377	va_list ap;
378
379	va_start(ap, format);
380	KMP_SNPRINTF(s: buffer, maxlen: sizeof(buffer), format: "OMP storage map: %p %p%8lu %s\n", p1,
381	p2, (unsigned long)size, format);
382	__kmp_acquire_bootstrap_lock(lck: &__kmp_stdio_lock);
383	__kmp_vprintf(stream: kmp_err, format: buffer, ap);
384	#if KMP_PRINT_DATA_PLACEMENT
385	int node;
386	if (gtid >= `0`) {
387	if (p1 <= p2 && (char )p2 - (char* *)p1 == size) {
388	if (__kmp_storage_map_verbose) {
389	node = __kmp_get_host_node(p1);
390	if (node < `0`) / doesn't work, so don't try this next time /
391	__kmp_storage_map_verbose = FALSE;
392	else {
393	char *last;
394	int lastNode;
395	int localProc = __kmp_get_cpu_from_gtid(gtid);
396
397	const int page_size = KMP_GET_PAGE_SIZE();
398
399	p1 = (void *)((size_t)p1 & ~((size_t)page_size - `1`));
400	p2 = (void *)(((size_t)p2 - `1`) & ~((size_t)page_size - `1`));
401	if (localProc >= `0`)
402	__kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
403	localProc >> `1`);
404	else
405	__kmp_printf_no_lock(" GTID %d\n", gtid);
406	#if KMP_USE_PRCTL
407	/ The more elaborate format is disabled for now because of the prctl*
408	* hanging bug. */
409	do {
410	last = p1;
411	lastNode = node;
412	/ This loop collates adjacent pages with the same host node. /
413	do {
414	(char *)p1 += page_size;
415	} while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
416	__kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - `1`,
417	lastNode);
418	} while (p1 <= p2);
419	#else
420	__kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
421	(char *)p1 + (page_size - `1`),
422	__kmp_get_host_node(p1));
423	if (p1 < p2) {
424	__kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
425	(char *)p2 + (page_size - `1`),
426	__kmp_get_host_node(p2));
427	}
428	#endif
429	}
430	}
431	} else
432	__kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning));
433	}
434	#endif /* KMP_PRINT_DATA_PLACEMENT */
435	__kmp_release_bootstrap_lock(lck: &__kmp_stdio_lock);
436
437	va_end(ap);
438	}
439
440	void __kmp_warn(char const *format, ...) {
441	char buffer[MAX_MESSAGE];
442	va_list ap;
443
444	if (__kmp_generate_warnings == kmp_warnings_off) {
445	return;
446	}
447
448	va_start(ap, format);
449
450	KMP_SNPRINTF(s: buffer, maxlen: sizeof(buffer), format: "OMP warning: %s\n", format);
451	__kmp_acquire_bootstrap_lock(lck: &__kmp_stdio_lock);
452	__kmp_vprintf(stream: kmp_err, format: buffer, ap);
453	__kmp_release_bootstrap_lock(lck: &__kmp_stdio_lock);
454
455	va_end(ap);
456	}
457
458	void __kmp_abort_process() {
459	// Later threads may stall here, but that's ok because abort() will kill them.
460	__kmp_acquire_bootstrap_lock(lck: &__kmp_exit_lock);
461
462	if (__kmp_debug_buf) {
463	__kmp_dump_debug_buffer();
464	}
465
466	#if KMP_OS_WINDOWS
467	// Let other threads know of abnormal termination and prevent deadlock
468	// if abort happened during library initialization or shutdown
469	__kmp_global.g.g_abort = SIGABRT;
470
471	/ On Windows* OS by default abort() causes pop-up error box, which stalls*
472	nightly testing. Unfortunately, we cannot reliably suppress pop-up error
473	boxes. _set_abort_behavior() works well, but this function is not
474	available in VS7 (this is not problem for DLL, but it is a problem for
475	static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
476	help, at least in some versions of MS C RTL.
477
478	It seems following sequence is the only way to simulate abort() and
479	avoid pop-up error box. /*
480	raise(SIGABRT);
481	_exit(`3`); // Just in case, if signal ignored, exit anyway.
482	#else
483	__kmp_unregister_library();
484	abort();
485	#endif
486
487	__kmp_infinite_loop();
488	__kmp_release_bootstrap_lock(lck: &__kmp_exit_lock);
489
490	} // __kmp_abort_process
491
492	void __kmp_abort_thread(void) {
493	// TODO: Eliminate g_abort global variable and this function.
494	// In case of abort just call abort(), it will kill all the threads.
495	__kmp_infinite_loop();
496	} // __kmp_abort_thread
497
498	/ Print out the storage map for the major kmp_info_t thread data structures*
499	that are allocated together. /*
500
501	static void __kmp_print_thread_storage_map(kmp_info_t thr, int* gtid) {
502	__kmp_print_storage_map_gtid(gtid, p1: thr, p2: thr + `1`, size: sizeof(kmp_info_t), format: "th_%d",
503	gtid);
504
505	__kmp_print_storage_map_gtid(gtid, p1: &thr->th.th_info, p2: &thr->th.th_team,
506	size: sizeof(kmp_desc_t), format: "th_%d.th_info", gtid);
507
508	__kmp_print_storage_map_gtid(gtid, p1: &thr->th.th_local, p2: &thr->th.th_pri_head,
509	size: sizeof(kmp_local_t), format: "th_%d.th_local", gtid);
510
511	__kmp_print_storage_map_gtid(
512	gtid, p1: &thr->th.th_bar[`0`], p2: &thr->th.th_bar[bs_last_barrier],
513	size: sizeof(kmp_balign_t) * bs_last_barrier, format: "th_%d.th_bar", gtid);
514
515	__kmp_print_storage_map_gtid(gtid, p1: &thr->th.th_bar[bs_plain_barrier],
516	p2: &thr->th.th_bar[bs_plain_barrier + `1`],
517	size: sizeof(kmp_balign_t), format: "th_%d.th_bar[plain]",
518	gtid);
519
520	__kmp_print_storage_map_gtid(gtid, p1: &thr->th.th_bar[bs_forkjoin_barrier],
521	p2: &thr->th.th_bar[bs_forkjoin_barrier + `1`],
522	size: sizeof(kmp_balign_t), format: "th_%d.th_bar[forkjoin]",
523	gtid);
524
525	#if KMP_FAST_REDUCTION_BARRIER
526	__kmp_print_storage_map_gtid(gtid, p1: &thr->th.th_bar[bs_reduction_barrier],
527	p2: &thr->th.th_bar[bs_reduction_barrier + `1`],
528	size: sizeof(kmp_balign_t), format: "th_%d.th_bar[reduction]",
529	gtid);
530	#endif // KMP_FAST_REDUCTION_BARRIER
531	}
532
533	/ Print out the storage map for the major kmp_team_t team data structures*
534	that are allocated together. /*
535
536	static void __kmp_print_team_storage_map(const char header, kmp_team_t team,
537	int team_id, int num_thr) {
538	int num_disp_buff = team->t.t_max_nproc > `1` ? __kmp_dispatch_num_buffers : `2`;
539	__kmp_print_storage_map_gtid(gtid: -`1`, p1: team, p2: team + `1`, size: sizeof(kmp_team_t), format: "%s_%d",
540	header, team_id);
541
542	__kmp_print_storage_map_gtid(gtid: -`1`, p1: &team->t.t_bar[`0`],
543	p2: &team->t.t_bar[bs_last_barrier],
544	size: sizeof(kmp_balign_team_t) * bs_last_barrier,
545	format: "%s_%d.t_bar", header, team_id);
546
547	__kmp_print_storage_map_gtid(gtid: -`1`, p1: &team->t.t_bar[bs_plain_barrier],
548	p2: &team->t.t_bar[bs_plain_barrier + `1`],
549	size: sizeof(kmp_balign_team_t), format: "%s_%d.t_bar[plain]",
550	header, team_id);
551
552	__kmp_print_storage_map_gtid(gtid: -`1`, p1: &team->t.t_bar[bs_forkjoin_barrier],
553	p2: &team->t.t_bar[bs_forkjoin_barrier + `1`],
554	size: sizeof(kmp_balign_team_t),
555	format: "%s_%d.t_bar[forkjoin]", header, team_id);
556
557	#if KMP_FAST_REDUCTION_BARRIER
558	__kmp_print_storage_map_gtid(gtid: -`1`, p1: &team->t.t_bar[bs_reduction_barrier],
559	p2: &team->t.t_bar[bs_reduction_barrier + `1`],
560	size: sizeof(kmp_balign_team_t),
561	format: "%s_%d.t_bar[reduction]", header, team_id);
562	#endif // KMP_FAST_REDUCTION_BARRIER
563
564	__kmp_print_storage_map_gtid(
565	gtid: -`1`, p1: &team->t.t_dispatch[`0`], p2: &team->t.t_dispatch[num_thr],
566	size: sizeof(kmp_disp_t) * num_thr, format: "%s_%d.t_dispatch", header, team_id);
567
568	__kmp_print_storage_map_gtid(
569	gtid: -`1`, p1: &team->t.t_threads[`0`], p2: &team->t.t_threads[num_thr],
570	size: sizeof(kmp_info_t ) num_thr, format: "%s_%d.t_threads", header, team_id);
571
572	__kmp_print_storage_map_gtid(gtid: -`1`, p1: &team->t.t_disp_buffer[`0`],
573	p2: &team->t.t_disp_buffer[num_disp_buff],
574	size: sizeof(dispatch_shared_info_t) * num_disp_buff,
575	format: "%s_%d.t_disp_buffer", header, team_id);
576	}
577
578	static void __kmp_init_allocator() {
579	__kmp_init_memkind();
580	__kmp_init_target_mem();
581	}
582	static void __kmp_fini_allocator() {
583	__kmp_fini_target_mem();
584	__kmp_fini_memkind();
585	}
586
587	/ ------------------------------------------------------------------------ /
588
589	#if ENABLE_LIBOMPTARGET
590	static void __kmp_init_omptarget() {
591	__kmp_init_target_task();
592	}
593	#endif
594
595	/ ------------------------------------------------------------------------ /
596
597	#if KMP_DYNAMIC_LIB
598	#if KMP_OS_WINDOWS
599
600	BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
601	//__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
602
603	switch (fdwReason) {
604
605	case DLL_PROCESS_ATTACH:
606	KA_TRACE(`10`, ("DllMain: PROCESS_ATTACH\n"));
607
608	return TRUE;
609
610	case DLL_PROCESS_DETACH:
611	KA_TRACE(`10`, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
612
613	// According to Windows documentation for DllMain entry point:*
614	// for DLL_PROCESS_DETACH, lpReserved is used for telling the difference:
615	// lpReserved == NULL when FreeLibrary() is called,
616	// lpReserved != NULL when the process is terminated.
617	// When FreeLibrary() is called, worker threads remain alive. So the
618	// runtime's state is consistent and executing proper shutdown is OK.
619	// When the process is terminated, worker threads have exited or been
620	// forcefully terminated by the OS and only the shutdown thread remains.
621	// This can leave the runtime in an inconsistent state.
622	// Hence, only attempt proper cleanup when FreeLibrary() is called.
623	// Otherwise, rely on OS to reclaim resources.
624	if (lpReserved == NULL)
625	__kmp_internal_end_library(__kmp_gtid_get_specific());
626
627	return TRUE;
628
629	case DLL_THREAD_ATTACH:
630	KA_TRACE(`10`, ("DllMain: THREAD_ATTACH\n"));
631
632	/ if we want to register new siblings all the time here call*
633	* __kmp_get_gtid(); */
634	return TRUE;
635
636	case DLL_THREAD_DETACH:
637	KA_TRACE(`10`, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
638
639	__kmp_internal_end_thread(__kmp_gtid_get_specific());
640	return TRUE;
641	}
642
643	return TRUE;
644	}
645
646	#endif /* KMP_OS_WINDOWS */
647	#endif /* KMP_DYNAMIC_LIB */
648
649	/ __kmp_parallel_deo -- Wait until it's our turn. /
650	void __kmp_parallel_deo(int gtid_ref, int* cid_ref, ident_t loc_ref) {
651	int gtid = *gtid_ref;
652	#ifdef BUILD_PARALLEL_ORDERED
653	kmp_team_t *team = __kmp_team_from_gtid(gtid);
654	#endif /* BUILD_PARALLEL_ORDERED */
655
656	if (__kmp_env_consistency_check) {
657	if (__kmp_threads[gtid]->th.th_root->r.r_active)
658	#if KMP_USE_DYNAMIC_LOCK
659	__kmp_push_sync(gtid, ct: ct_ordered_in_parallel, ident: loc_ref, NULL, `0`);
660	#else
661	__kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
662	#endif
663	}
664	#ifdef BUILD_PARALLEL_ORDERED
665	if (!team->t.t_serialized) {
666	KMP_MB();
667	KMP_WAIT(spinner: &team->t.t_ordered.dt.t_value, checker: __kmp_tid_from_gtid(gtid), KMP_EQ,
668	NULL);
669	KMP_MB();
670	}
671	#endif /* BUILD_PARALLEL_ORDERED */
672	}
673
674	/ __kmp_parallel_dxo -- Signal the next task. /
675	void __kmp_parallel_dxo(int gtid_ref, int* cid_ref, ident_t loc_ref) {
676	int gtid = *gtid_ref;
677	#ifdef BUILD_PARALLEL_ORDERED
678	int tid = __kmp_tid_from_gtid(gtid);
679	kmp_team_t *team = __kmp_team_from_gtid(gtid);
680	#endif /* BUILD_PARALLEL_ORDERED */
681
682	if (__kmp_env_consistency_check) {
683	if (__kmp_threads[gtid]->th.th_root->r.r_active)
684	__kmp_pop_sync(gtid, ct: ct_ordered_in_parallel, ident: loc_ref);
685	}
686	#ifdef BUILD_PARALLEL_ORDERED
687	if (!team->t.t_serialized) {
688	KMP_MB(); / Flush all pending memory write invalidates. /
689
690	/ use the tid of the next thread in this team /
691	/ TODO replace with general release procedure /
692	team->t.t_ordered.dt.t_value = ((tid + `1`) % team->t.t_nproc);
693
694	KMP_MB(); / Flush all pending memory write invalidates. /
695	}
696	#endif /* BUILD_PARALLEL_ORDERED */
697	}
698
699	/ ------------------------------------------------------------------------ /
700	/ The BARRIER for a SINGLE process section is always explicit /
701
702	int __kmp_enter_single(int gtid, ident_t id_ref, int* push_ws) {
703	int status;
704	kmp_info_t *th;
705	kmp_team_t *team;
706
707	if (!TCR_4(__kmp_init_parallel))
708	__kmp_parallel_initialize();
709	__kmp_resume_if_soft_paused();
710
711	th = __kmp_threads[gtid];
712	team = th->th.th_team;
713	status = `0`;
714
715	th->th.th_ident = id_ref;
716
717	if (team->t.t_serialized) {
718	status = `1`;
719	} else {
720	kmp_int32 old_this = th->th.th_local.this_construct;
721
722	++th->th.th_local.this_construct;
723	/ try to set team count to thread count--success means thread got the*
724	single block /*
725	/ TODO: Should this be acquire or release? /
726	if (team->t.t_construct == old_this) {
727	status = __kmp_atomic_compare_store_acq(p: &team->t.t_construct, expected: old_this,
728	desired: th->th.th_local.this_construct);
729	}
730	#if USE_ITT_BUILD
731	if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == `3` &&
732	KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
733	team->t.t_active_level == `1`) {
734	// Only report metadata by primary thread of active team at level 1
735	__kmp_itt_metadata_single(loc: id_ref);
736	}
737	#endif /* USE_ITT_BUILD */
738	}
739
740	if (__kmp_env_consistency_check) {
741	if (status && push_ws) {
742	__kmp_push_workshare(gtid, ct: ct_psingle, ident: id_ref);
743	} else {
744	__kmp_check_workshare(gtid, ct: ct_psingle, ident: id_ref);
745	}
746	}
747	#if USE_ITT_BUILD
748	if (status) {
749	__kmp_itt_single_start(gtid);
750	}
751	#endif /* USE_ITT_BUILD */
752	return status;
753	}
754
755	void __kmp_exit_single(int gtid) {
756	#if USE_ITT_BUILD
757	__kmp_itt_single_end(gtid);
758	#endif /* USE_ITT_BUILD */
759	if (__kmp_env_consistency_check)
760	__kmp_pop_workshare(gtid, ct: ct_psingle, NULL);
761	}
762
763	/ determine if we can go parallel or must use a serialized parallel region and*
764	* how many threads we can use
765	* set_nproc is the number of threads requested for the team
766	* returns 0 if we should serialize or only use one thread,
767	* otherwise the number of threads to use
768	* The forkjoin lock is held by the caller. */
769	static int __kmp_reserve_threads(kmp_root_t root, kmp_team_t parent_team,
770	int master_tid, int set_nthreads,
771	int enter_teams) {
772	int capacity;
773	int new_nthreads;
774	KMP_DEBUG_ASSERT(__kmp_init_serial);
775	KMP_DEBUG_ASSERT(root && parent_team);
776	kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
777
778	// If dyn-var is set, dynamically adjust the number of desired threads,
779	// according to the method specified by dynamic_mode.
780	new_nthreads = set_nthreads;
781	if (!get__dynamic_2(parent_team, master_tid)) {
782	;
783	}
784	#ifdef USE_LOAD_BALANCE
785	else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
786	new_nthreads = __kmp_load_balance_nproc(root, set_nproc: set_nthreads);
787	if (new_nthreads == `1`) {
788	KC_TRACE(`10`, ("__kmp_reserve_threads: T#%d load balance reduced "
789	"reservation to 1 thread\n",
790	master_tid));
791	return `1`;
792	}
793	if (new_nthreads < set_nthreads) {
794	KC_TRACE(`10`, ("__kmp_reserve_threads: T#%d load balance reduced "
795	"reservation to %d threads\n",
796	master_tid, new_nthreads));
797	}
798	}
799	#endif /* USE_LOAD_BALANCE */
800	else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
801	new_nthreads = __kmp_avail_proc - __kmp_nth +
802	(root->r.r_active ? `1` : root->r.r_hot_team->t.t_nproc);
803	if (new_nthreads <= `1`) {
804	KC_TRACE(`10`, ("__kmp_reserve_threads: T#%d thread limit reduced "
805	"reservation to 1 thread\n",
806	master_tid));
807	return `1`;
808	}
809	if (new_nthreads < set_nthreads) {
810	KC_TRACE(`10`, ("__kmp_reserve_threads: T#%d thread limit reduced "
811	"reservation to %d threads\n",
812	master_tid, new_nthreads));
813	} else {
814	new_nthreads = set_nthreads;
815	}
816	} else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
817	if (set_nthreads > `2`) {
818	new_nthreads = __kmp_get_random(thread: parent_team->t.t_threads[master_tid]);
819	new_nthreads = (new_nthreads % set_nthreads) + `1`;
820	if (new_nthreads == `1`) {
821	KC_TRACE(`10`, ("__kmp_reserve_threads: T#%d dynamic random reduced "
822	"reservation to 1 thread\n",
823	master_tid));
824	return `1`;
825	}
826	if (new_nthreads < set_nthreads) {
827	KC_TRACE(`10`, ("__kmp_reserve_threads: T#%d dynamic random reduced "
828	"reservation to %d threads\n",
829	master_tid, new_nthreads));
830	}
831	}
832	} else {
833	KMP_ASSERT(`0`);
834	}
835
836	// Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
837	if (__kmp_nth + new_nthreads -
838	(root->r.r_active ? `1` : root->r.r_hot_team->t.t_nproc) >
839	__kmp_max_nth) {
840	int tl_nthreads = __kmp_max_nth - __kmp_nth +
841	(root->r.r_active ? `1` : root->r.r_hot_team->t.t_nproc);
842	if (tl_nthreads <= `0`) {
843	tl_nthreads = `1`;
844	}
845
846	// If dyn-var is false, emit a 1-time warning.
847	if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
848	__kmp_reserve_warn = `1`;
849	__kmp_msg(kmp_ms_warning,
850	KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
851	KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
852	}
853	if (tl_nthreads == `1`) {
854	KC_TRACE(`10`, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
855	"reduced reservation to 1 thread\n",
856	master_tid));
857	return `1`;
858	}
859	KC_TRACE(`10`, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
860	"reservation to %d threads\n",
861	master_tid, tl_nthreads));
862	new_nthreads = tl_nthreads;
863	}
864
865	// Respect OMP_THREAD_LIMIT
866	int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
867	int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
868	if (cg_nthreads + new_nthreads -
869	(root->r.r_active ? `1` : root->r.r_hot_team->t.t_nproc) >
870	max_cg_threads) {
871	int tl_nthreads = max_cg_threads - cg_nthreads +
872	(root->r.r_active ? `1` : root->r.r_hot_team->t.t_nproc);
873	if (tl_nthreads <= `0`) {
874	tl_nthreads = `1`;
875	}
876
877	// If dyn-var is false, emit a 1-time warning.
878	if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
879	__kmp_reserve_warn = `1`;
880	__kmp_msg(kmp_ms_warning,
881	KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
882	KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
883	}
884	if (tl_nthreads == `1`) {
885	KC_TRACE(`10`, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
886	"reduced reservation to 1 thread\n",
887	master_tid));
888	return `1`;
889	}
890	KC_TRACE(`10`, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
891	"reservation to %d threads\n",
892	master_tid, tl_nthreads));
893	new_nthreads = tl_nthreads;
894	}
895
896	// Check if the threads array is large enough, or needs expanding.
897	// See comment in __kmp_register_root() about the adjustment if
898	// __kmp_threads[0] == NULL.
899	capacity = __kmp_threads_capacity;
900	if (TCR_PTR(__kmp_threads[`0`]) == NULL) {
901	--capacity;
902	}
903	// If it is not for initializing the hidden helper team, we need to take
904	// __kmp_hidden_helper_threads_num out of the capacity because it is included
905	// in __kmp_threads_capacity.
906	if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
907	capacity -= __kmp_hidden_helper_threads_num;
908	}
909	if (__kmp_nth + new_nthreads -
910	(root->r.r_active ? `1` : root->r.r_hot_team->t.t_nproc) >
911	capacity) {
912	// Expand the threads array.
913	int slotsRequired = __kmp_nth + new_nthreads -
914	(root->r.r_active ? `1` : root->r.r_hot_team->t.t_nproc) -
915	capacity;
916	int slotsAdded = __kmp_expand_threads(nNeed: slotsRequired);
917	if (slotsAdded < slotsRequired) {
918	// The threads array was not expanded enough.
919	new_nthreads -= (slotsRequired - slotsAdded);
920	KMP_ASSERT(new_nthreads >= `1`);
921
922	// If dyn-var is false, emit a 1-time warning.
923	if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
924	__kmp_reserve_warn = `1`;
925	if (__kmp_tp_cached) {
926	__kmp_msg(kmp_ms_warning,
927	KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
928	KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
929	KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
930	} else {
931	__kmp_msg(kmp_ms_warning,
932	KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
933	KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
934	}
935	}
936	}
937	}
938
939	#ifdef KMP_DEBUG
940	if (new_nthreads == `1`) {
941	KC_TRACE(`10`,
942	("__kmp_reserve_threads: T#%d serializing team after reclaiming "
943	"dead roots and rechecking; requested %d threads\n",
944	__kmp_get_gtid(), set_nthreads));
945	} else {
946	KC_TRACE(`10`, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
947	" %d threads\n",
948	__kmp_get_gtid(), new_nthreads, set_nthreads));
949	}
950	#endif // KMP_DEBUG
951
952	if (this_thr->th.th_nt_strict && new_nthreads < set_nthreads) {
953	__kmpc_error(loc: this_thr->th.th_nt_loc, severity: this_thr->th.th_nt_sev,
954	message: this_thr->th.th_nt_msg);
955	}
956	return new_nthreads;
957	}
958
959	/ Allocate threads from the thread pool and assign them to the new team. We are*
960	assured that there are enough threads available, because we checked on that
961	earlier within critical section forkjoin /*
962	static void __kmp_fork_team_threads(kmp_root_t root, kmp_team_t team,
963	kmp_info_t master_th, int* master_gtid,
964	int fork_teams_workers) {
965	int i;
966	int use_hot_team;
967
968	KA_TRACE(`10`, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
969	KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
970	KMP_MB();
971
972	/ first, let's setup the primary thread /
973	master_th->th.th_info.ds.ds_tid = `0`;
974	master_th->th.th_team = team;
975	master_th->th.th_team_nproc = team->t.t_nproc;
976	master_th->th.th_team_master = master_th;
977	master_th->th.th_team_serialized = FALSE;
978	master_th->th.th_dispatch = &team->t.t_dispatch[`0`];
979
980	/ make sure we are not the optimized hot team /
981	#if KMP_NESTED_HOT_TEAMS
982	use_hot_team = `0`;
983	kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
984	if (hot_teams) { // hot teams array is not allocated if
985	// KMP_HOT_TEAMS_MAX_LEVEL=0
986	int level = team->t.t_active_level - `1`; // index in array of hot teams
987	if (master_th->th.th_teams_microtask) { // are we inside the teams?
988	if (master_th->th.th_teams_size.nteams > `1`) {
989	++level; // level was not increased in teams construct for
990	// team_of_masters
991	}
992	if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
993	master_th->th.th_teams_level == team->t.t_level) {
994	++level; // level was not increased in teams construct for
995	// team_of_workers before the parallel
996	} // team->t.t_level will be increased inside parallel
997	}
998	if (level < __kmp_hot_teams_max_level) {
999	if (hot_teams[level].hot_team) {
1000	// hot team has already been allocated for given level
1001	KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1002	use_hot_team = `1`; // the team is ready to use
1003	} else {
1004	use_hot_team = `0`; // AC: threads are not allocated yet
1005	hot_teams[level].hot_team = team; // remember new hot team
1006	hot_teams[level].hot_team_nth = team->t.t_nproc;
1007	}
1008	} else {
1009	use_hot_team = `0`;
1010	}
1011	}
1012	#else
1013	use_hot_team = team == root->r.r_hot_team;
1014	#endif
1015	if (!use_hot_team) {
1016
1017	/ install the primary thread /
1018	team->t.t_threads[`0`] = master_th;
1019	__kmp_initialize_info(master_th, team, tid: `0`, gtid: master_gtid);
1020
1021	/ now, install the worker threads /
1022	for (i = `1`; i < team->t.t_nproc; i++) {
1023
1024	/ fork or reallocate a new thread and install it in team /
1025	kmp_info_t *thr = __kmp_allocate_thread(root, team, tid: i);
1026	team->t.t_threads[i] = thr;
1027	KMP_DEBUG_ASSERT(thr);
1028	KMP_DEBUG_ASSERT(thr->th.th_team == team);
1029	/ align team and thread arrived states /
1030	KA_TRACE(`20`, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1031	"T#%d(%d:%d) join =%llu, plain=%llu\n",
1032	__kmp_gtid_from_tid(`0`, team), team->t.t_id, `0`,
1033	__kmp_gtid_from_tid(i, team), team->t.t_id, i,
1034	team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1035	team->t.t_bar[bs_plain_barrier].b_arrived));
1036	thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1037	thr->th.th_teams_level = master_th->th.th_teams_level;
1038	thr->th.th_teams_size = master_th->th.th_teams_size;
1039	{ // Initialize threads' barrier data.
1040	int b;
1041	kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1042	for (b = `0`; b < bs_last_barrier; ++b) {
1043	balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1044	KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1045	#if USE_DEBUGGER
1046	balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1047	#endif
1048	}
1049	}
1050	}
1051
1052	#if KMP_AFFINITY_SUPPORTED
1053	// Do not partition the places list for teams construct workers who
1054	// haven't actually been forked to do real work yet. This partitioning
1055	// will take place in the parallel region nested within the teams construct.
1056	if (!fork_teams_workers) {
1057	__kmp_partition_places(team);
1058	}
1059	#endif
1060
1061	if (team->t.t_nproc > `1` &&
1062	__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1063	team->t.b->update_num_threads(nthr: team->t.t_nproc);
1064	__kmp_add_threads_to_team(team, new_nthreads: team->t.t_nproc);
1065	}
1066	}
1067
1068	// Take care of primary thread's task state
1069	if (__kmp_tasking_mode != tskm_immediate_exec) {
1070	if (use_hot_team) {
1071	KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team->t.t_parent, master_th);
1072	KA_TRACE(
1073	`20`,
1074	("__kmp_fork_team_threads: Primary T#%d pushing task_team %p / team "
1075	"%p, new task_team %p / team %p\n",
1076	__kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
1077	team->t.t_parent, team->t.t_task_team[master_th->th.th_task_state],
1078	team));
1079
1080	// Store primary thread's current task state on new team
1081	KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1082	master_th->th.th_task_state);
1083
1084	// Restore primary thread's task state to hot team's state
1085	// by using thread 1's task state
1086	if (team->t.t_nproc > `1`) {
1087	KMP_DEBUG_ASSERT(team->t.t_threads[`1`]->th.th_task_state == `0` \|\|
1088	team->t.t_threads[`1`]->th.th_task_state == `1`);
1089	KMP_CHECK_UPDATE(master_th->th.th_task_state,
1090	team->t.t_threads[`1`]->th.th_task_state);
1091	} else {
1092	master_th->th.th_task_state = `0`;
1093	}
1094	} else {
1095	// Store primary thread's current task_state on new team
1096	KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1097	master_th->th.th_task_state);
1098	// Are not using hot team, so set task state to 0.
1099	master_th->th.th_task_state = `0`;
1100	}
1101	}
1102
1103	if (__kmp_display_affinity && team->t.t_display_affinity != `1`) {
1104	for (i = `0`; i < team->t.t_nproc; i++) {
1105	kmp_info_t *thr = team->t.t_threads[i];
1106	if (thr->th.th_prev_num_threads != team->t.t_nproc \|\|
1107	thr->th.th_prev_level != team->t.t_level) {
1108	team->t.t_display_affinity = `1`;
1109	break;
1110	}
1111	}
1112	}
1113
1114	KMP_MB();
1115	}
1116
1117	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
1118	// Propagate any changes to the floating point control registers out to the team
1119	// We try to avoid unnecessary writes to the relevant cache line in the team
1120	// structure, so we don't make changes unless they are needed.
1121	inline static void propagateFPControl(kmp_team_t *team) {
1122	if (__kmp_inherit_fp_control) {
1123	kmp_int16 x87_fpu_control_word;
1124	kmp_uint32 mxcsr;
1125
1126	// Get primary thread's values of FPU control flags (both X87 and vector)
1127	__kmp_store_x87_fpu_control_word(p: &x87_fpu_control_word);
1128	__kmp_store_mxcsr(p: &mxcsr);
1129	mxcsr &= KMP_X86_MXCSR_MASK;
1130
1131	// There is no point looking at t_fp_control_saved here.
1132	// If it is TRUE, we still have to update the values if they are different
1133	// from those we now have. If it is FALSE we didn't save anything yet, but
1134	// our objective is the same. We have to ensure that the values in the team
1135	// are the same as those we have.
1136	// So, this code achieves what we need whether or not t_fp_control_saved is
1137	// true. By checking whether the value needs updating we avoid unnecessary
1138	// writes that would put the cache-line into a written state, causing all
1139	// threads in the team to have to read it again.
1140	KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1141	KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1142	// Although we don't use this value, other code in the runtime wants to know
1143	// whether it should restore them. So we must ensure it is correct.
1144	KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1145	} else {
1146	// Similarly here. Don't write to this cache-line in the team structure
1147	// unless we have to.
1148	KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1149	}
1150	}
1151
1152	// Do the opposite, setting the hardware registers to the updated values from
1153	// the team.
1154	inline static void updateHWFPControl(kmp_team_t *team) {
1155	if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1156	// Only reset the fp control regs if they have been changed in the team.
1157	// the parallel region that we are exiting.
1158	kmp_int16 x87_fpu_control_word;
1159	kmp_uint32 mxcsr;
1160	__kmp_store_x87_fpu_control_word(p: &x87_fpu_control_word);
1161	__kmp_store_mxcsr(p: &mxcsr);
1162	mxcsr &= KMP_X86_MXCSR_MASK;
1163
1164	if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1165	__kmp_clear_x87_fpu_status_word();
1166	__kmp_load_x87_fpu_control_word(p: &team->t.t_x87_fpu_control_word);
1167	}
1168
1169	if (team->t.t_mxcsr != mxcsr) {
1170	__kmp_load_mxcsr(p: &team->t.t_mxcsr);
1171	}
1172	}
1173	}
1174	#else
1175	#define propagateFPControl(x) ((void)0)
1176	#define updateHWFPControl(x) ((void)0)
1177	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
1178
1179	static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1180	int realloc); // forward declaration
1181
1182	/ Run a parallel region that has been serialized, so runs only in a team of the*
1183	single primary thread. /*
1184	void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1185	kmp_info_t *this_thr;
1186	kmp_team_t *serial_team;
1187
1188	KC_TRACE(`10`, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1189
1190	/ Skip all this code for autopar serialized loops since it results in*
1191	unacceptable overhead /*
1192	if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
1193	return;
1194
1195	if (!TCR_4(__kmp_init_parallel))
1196	__kmp_parallel_initialize();
1197	__kmp_resume_if_soft_paused();
1198
1199	this_thr = __kmp_threads[global_tid];
1200	serial_team = this_thr->th.th_serial_team;
1201
1202	/ utilize the serialized team held by this thread /
1203	KMP_DEBUG_ASSERT(serial_team);
1204	KMP_MB();
1205
1206	kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1207	if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1208	proc_bind = proc_bind_false;
1209	} else if (proc_bind == proc_bind_default) {
1210	// No proc_bind clause was specified, so use the current value
1211	// of proc-bind-var for this parallel region.
1212	proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1213	}
1214	// Reset for next parallel region
1215	this_thr->th.th_set_proc_bind = proc_bind_default;
1216
1217	// Reset num_threads for next parallel region
1218	this_thr->th.th_set_nproc = `0`;
1219
1220	#if OMPT_SUPPORT
1221	ompt_data_t ompt_parallel_data = ompt_data_none;
1222	void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1223	if (ompt_enabled.enabled &&
1224	this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1225
1226	ompt_task_info_t *parent_task_info;
1227	parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1228
1229	parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(`0`);
1230	if (ompt_enabled.ompt_callback_parallel_begin) {
1231	int team_size = `1`;
1232
1233	ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1234	&(parent_task_info->task_data), &(parent_task_info->frame),
1235	&ompt_parallel_data, team_size,
1236	ompt_parallel_invoker_program \| ompt_parallel_team, codeptr);
1237	}
1238	}
1239	#endif // OMPT_SUPPORT
1240
1241	if (this_thr->th.th_team != serial_team) {
1242	// Nested level will be an index in the nested nthreads array
1243	int level = this_thr->th.th_team->t.t_level;
1244
1245	if (serial_team->t.t_serialized) {
1246	/ this serial team was already used*
1247	TODO increase performance by making this locks more specific /*
1248	kmp_team_t *new_team;
1249
1250	__kmp_acquire_bootstrap_lock(lck: &__kmp_forkjoin_lock);
1251
1252	new_team =
1253	__kmp_allocate_team(root: this_thr->th.th_root, new_nproc: `1`, max_nproc: `1`,
1254	#if OMPT_SUPPORT
1255	ompt_parallel_data,
1256	#endif
1257	proc_bind, new_icvs: &this_thr->th.th_current_task->td_icvs,
1258	argc: `0` USE_NESTED_HOT_ARG(NULL));
1259	__kmp_release_bootstrap_lock(lck: &__kmp_forkjoin_lock);
1260	KMP_ASSERT(new_team);
1261
1262	/ setup new serialized team and install it /
1263	new_team->t.t_threads[`0`] = this_thr;
1264	new_team->t.t_parent = this_thr->th.th_team;
1265	serial_team = new_team;
1266	this_thr->th.th_serial_team = serial_team;
1267
1268	KF_TRACE(
1269	`10`,
1270	("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1271	global_tid, serial_team));
1272
1273	/ TODO the above breaks the requirement that if we run out of resources,*
1274	then we can still guarantee that serialized teams are ok, since we may
1275	need to allocate a new one /*
1276	} else {
1277	KF_TRACE(
1278	`10`,
1279	("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1280	global_tid, serial_team));
1281	}
1282
1283	/ we have to initialize this serial team /
1284	KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1285	KMP_DEBUG_ASSERT(serial_team->t.t_threads[`0`] == this_thr);
1286	KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1287	serial_team->t.t_ident = loc;
1288	serial_team->t.t_serialized = `1`;
1289	serial_team->t.t_nproc = `1`;
1290	serial_team->t.t_parent = this_thr->th.th_team;
1291	if (this_thr->th.th_team->t.t_nested_nth)
1292	serial_team->t.t_nested_nth = this_thr->th.th_team->t.t_nested_nth;
1293	else
1294	serial_team->t.t_nested_nth = &__kmp_nested_nth;
1295	// Save previous team's task state on serial team structure
1296	serial_team->t.t_primary_task_state = this_thr->th.th_task_state;
1297	serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1298	this_thr->th.th_team = serial_team;
1299	serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1300
1301	KF_TRACE(`10`, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1302	this_thr->th.th_current_task));
1303	KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == `1`);
1304	this_thr->th.th_current_task->td_flags.executing = `0`;
1305
1306	__kmp_push_current_task_to_thread(this_thr, team: serial_team, tid: `0`);
1307
1308	/ TODO: GEH: do ICVs work for nested serialized teams? Don't we need an*
1309	implicit task for each serialized task represented by
1310	team->t.t_serialized? /*
1311	copy_icvs(dst: &this_thr->th.th_current_task->td_icvs,
1312	src: &this_thr->th.th_current_task->td_parent->td_icvs);
1313
1314	// Thread value exists in the nested nthreads array for the next nested
1315	// level
1316	kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
1317	if (this_thr->th.th_team->t.t_nested_nth)
1318	nested_nth = this_thr->th.th_team->t.t_nested_nth;
1319	if (nested_nth->used && (level + `1` < nested_nth->used)) {
1320	this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + `1`];
1321	}
1322
1323	if (__kmp_nested_proc_bind.used &&
1324	(level + `1` < __kmp_nested_proc_bind.used)) {
1325	this_thr->th.th_current_task->td_icvs.proc_bind =
1326	__kmp_nested_proc_bind.bind_types[level + `1`];
1327	}
1328
1329	#if USE_DEBUGGER
1330	serial_team->t.t_pkfn = (microtask_t)(~`0`); // For the debugger.
1331	#endif
1332	this_thr->th.th_info.ds.ds_tid = `0`;
1333
1334	/ set thread cache values /
1335	this_thr->th.th_team_nproc = `1`;
1336	this_thr->th.th_team_master = this_thr;
1337	this_thr->th.th_team_serialized = `1`;
1338	this_thr->th.th_task_team = NULL;
1339	this_thr->th.th_task_state = `0`;
1340
1341	serial_team->t.t_level = serial_team->t.t_parent->t.t_level + `1`;
1342	serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1343	serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save
1344
1345	propagateFPControl(team: serial_team);
1346
1347	/ check if we need to allocate dispatch buffers stack /
1348	KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1349	if (!serial_team->t.t_dispatch->th_disp_buffer) {
1350	serial_team->t.t_dispatch->th_disp_buffer =
1351	(dispatch_private_info_t *)__kmp_allocate(
1352	sizeof(dispatch_private_info_t));
1353	}
1354	this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1355
1356	KMP_MB();
1357
1358	} else {
1359	/ this serialized team is already being used,*
1360	* that's fine, just add another nested level */
1361	KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1362	KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1363	KMP_DEBUG_ASSERT(serial_team->t.t_threads[`0`] == this_thr);
1364	++serial_team->t.t_serialized;
1365	this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1366
1367	// Nested level will be an index in the nested nthreads array
1368	int level = this_thr->th.th_team->t.t_level;
1369	// Thread value exists in the nested nthreads array for the next nested
1370	// level
1371
1372	kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
1373	if (serial_team->t.t_nested_nth)
1374	nested_nth = serial_team->t.t_nested_nth;
1375	if (nested_nth->used && (level + `1` < nested_nth->used)) {
1376	this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + `1`];
1377	}
1378
1379	serial_team->t.t_level++;
1380	KF_TRACE(`10`, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
1381	"of serial team %p to %d\n",
1382	global_tid, serial_team, serial_team->t.t_level));
1383
1384	/ allocate/push dispatch buffers stack /
1385	KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1386	{
1387	dispatch_private_info_t *disp_buffer =
1388	(dispatch_private_info_t *)__kmp_allocate(
1389	sizeof(dispatch_private_info_t));
1390	disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1391	serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1392	}
1393	this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1394
1395	/ allocate/push task team stack /
1396	__kmp_push_task_team_node(thread: this_thr, team: serial_team);
1397
1398	KMP_MB();
1399	}
1400	KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1401
1402	// Perform the display affinity functionality for
1403	// serialized parallel regions
1404	if (__kmp_display_affinity) {
1405	if (this_thr->th.th_prev_level != serial_team->t.t_level \|\|
1406	this_thr->th.th_prev_num_threads != `1`) {
1407	// NULL means use the affinity-format-var ICV
1408	__kmp_aux_display_affinity(gtid: global_tid, NULL);
1409	this_thr->th.th_prev_level = serial_team->t.t_level;
1410	this_thr->th.th_prev_num_threads = `1`;
1411	}
1412	}
1413
1414	if (__kmp_env_consistency_check)
1415	__kmp_push_parallel(gtid: global_tid, NULL);
1416	#if OMPT_SUPPORT
1417	serial_team->t.ompt_team_info.master_return_address = codeptr;
1418	if (ompt_enabled.enabled &&
1419	this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1420	OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1421	OMPT_GET_FRAME_ADDRESS(`0`);
1422
1423	ompt_lw_taskteam_t lw_taskteam;
1424	__ompt_lw_taskteam_init(lwt: &lw_taskteam, thr: this_thr, gtid: global_tid,
1425	ompt_pid: &ompt_parallel_data, codeptr);
1426
1427	__ompt_lw_taskteam_link(lwt: &lw_taskteam, thr: this_thr, on_heap: `1`);
1428	// don't use lw_taskteam after linking. content was swaped
1429
1430	/ OMPT implicit task begin /
1431	if (ompt_enabled.ompt_callback_implicit_task) {
1432	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1433	ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1434	OMPT_CUR_TASK_DATA(this_thr), `1`, __kmp_tid_from_gtid(gtid: global_tid),
1435	ompt_task_implicit); // TODO: Can this be ompt_task_initial?
1436	OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1437	__kmp_tid_from_gtid(gtid: global_tid);
1438	}
1439
1440	/ OMPT state /
1441	this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1442	OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1443	OMPT_GET_FRAME_ADDRESS(`0`);
1444	}
1445	#endif
1446	}
1447
1448	// Test if this fork is for a team closely nested in a teams construct
1449	static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1450	microtask_t microtask, int level,
1451	int teams_level, kmp_va_list ap) {
1452	return (master_th->th.th_teams_microtask && ap &&
1453	microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1454	}
1455
1456	// Test if this fork is for the teams construct, i.e. to form the outer league
1457	// of teams
1458	static inline bool __kmp_is_entering_teams(int active_level, int level,
1459	int teams_level, kmp_va_list ap) {
1460	return ((ap == NULL && active_level == `0`) \|\|
1461	(ap && teams_level > `0` && teams_level == level));
1462	}
1463
1464	// AC: This is start of parallel that is nested inside teams construct.
1465	// The team is actual (hot), all workers are ready at the fork barrier.
1466	// No lock needed to initialize the team a bit, then free workers.
1467	static inline int
1468	__kmp_fork_in_teams(ident_t loc, int* gtid, kmp_team_t *parent_team,
1469	kmp_int32 argc, kmp_info_t master_th, kmp_root_t root,
1470	enum fork_context_e call_context, microtask_t microtask,
1471	launch_t invoker, int master_set_numthreads, int level,
1472	#if OMPT_SUPPORT
1473	ompt_data_t ompt_parallel_data, void *return_address,
1474	#endif
1475	kmp_va_list ap) {
1476	void **argv;
1477	int i;
1478
1479	parent_team->t.t_ident = loc;
1480	__kmp_alloc_argv_entries(argc, team: parent_team, TRUE);
1481	parent_team->t.t_argc = argc;
1482	argv = (void **)parent_team->t.t_argv;
1483	for (i = argc - `1`; i >= `0`; --i) {
1484	argv++ = va_arg(kmp_va_deref(ap), void* *);
1485	}
1486	// Increment our nested depth levels, but not increase the serialization
1487	if (parent_team == master_th->th.th_serial_team) {
1488	// AC: we are in serialized parallel
1489	__kmpc_serialized_parallel(loc, global_tid: gtid);
1490	KMP_DEBUG_ASSERT(parent_team->t.t_serialized > `1`);
1491
1492	if (call_context == fork_context_gnu) {
1493	// AC: need to decrement t_serialized for enquiry functions to work
1494	// correctly, will restore at join time
1495	parent_team->t.t_serialized--;
1496	return TRUE;
1497	}
1498
1499	#if OMPD_SUPPORT
1500	parent_team->t.t_pkfn = microtask;
1501	#endif
1502
1503	#if OMPT_SUPPORT
1504	void *dummy;
1505	void **exit_frame_p;
1506	ompt_data_t *implicit_task_data;
1507	ompt_lw_taskteam_t lw_taskteam;
1508
1509	if (ompt_enabled.enabled) {
1510	__ompt_lw_taskteam_init(lwt: &lw_taskteam, thr: master_th, gtid,
1511	ompt_pid: &ompt_parallel_data, codeptr: return_address);
1512	exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1513
1514	__ompt_lw_taskteam_link(lwt: &lw_taskteam, thr: master_th, on_heap: `0`);
1515	// Don't use lw_taskteam after linking. Content was swapped.
1516
1517	/ OMPT implicit task begin /
1518	implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1519	if (ompt_enabled.ompt_callback_implicit_task) {
1520	OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1521	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1522	ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1523	`1`, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1524	}
1525
1526	/ OMPT state /
1527	master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1528	} else {
1529	exit_frame_p = &dummy;
1530	}
1531	#endif
1532
1533	// AC: need to decrement t_serialized for enquiry functions to work
1534	// correctly, will restore at join time
1535	parent_team->t.t_serialized--;
1536
1537	{
1538	KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1539	KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1540	__kmp_invoke_microtask(pkfn: microtask, gtid, npr: `0`, argc, argv: parent_team->t.t_argv
1541	#if OMPT_SUPPORT
1542	,
1543	exit_frame_ptr: exit_frame_p
1544	#endif
1545	);
1546	}
1547
1548	#if OMPT_SUPPORT
1549	if (ompt_enabled.enabled) {
1550	*exit_frame_p = NULL;
1551	OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1552	if (ompt_enabled.ompt_callback_implicit_task) {
1553	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1554	ompt_scope_end, NULL, implicit_task_data, `1`,
1555	OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1556	}
1557	ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1558	__ompt_lw_taskteam_unlink(thr: master_th);
1559	if (ompt_enabled.ompt_callback_parallel_end) {
1560	ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1561	&ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1562	OMPT_INVOKER(call_context) \| ompt_parallel_team, return_address);
1563	}
1564	master_th->th.ompt_thread_info.state = ompt_state_overhead;
1565	}
1566	#endif
1567	return TRUE;
1568	}
1569
1570	parent_team->t.t_pkfn = microtask;
1571	parent_team->t.t_invoke = invoker;
1572	KMP_ATOMIC_INC(&root->r.r_in_parallel);
1573	parent_team->t.t_active_level++;
1574	parent_team->t.t_level++;
1575	parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
1576
1577	// If the threads allocated to the team are less than the thread limit, update
1578	// the thread limit here. th_teams_size.nth is specific to this team nested
1579	// in a teams construct, the team is fully created, and we're about to do
1580	// the actual fork. Best to do this here so that the subsequent uses below
1581	// and in the join have the correct value.
1582	master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1583
1584	#if OMPT_SUPPORT
1585	if (ompt_enabled.enabled) {
1586	ompt_lw_taskteam_t lw_taskteam;
1587	__ompt_lw_taskteam_init(lwt: &lw_taskteam, thr: master_th, gtid, ompt_pid: &ompt_parallel_data,
1588	codeptr: return_address);
1589	__ompt_lw_taskteam_link(lwt: &lw_taskteam, thr: master_th, on_heap: `1`, always: true);
1590	}
1591	#endif
1592
1593	/ Change number of threads in the team if requested /
1594	if (master_set_numthreads) { // The parallel has num_threads clause
1595	if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1596	// AC: only can reduce number of threads dynamically, can't increase
1597	kmp_info_t **other_threads = parent_team->t.t_threads;
1598	// NOTE: if using distributed barrier, we need to run this code block
1599	// even when the team size appears not to have changed from the max.
1600	int old_proc = master_th->th.th_teams_size.nth;
1601	if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1602	__kmp_resize_dist_barrier(team: parent_team, old_nthreads: old_proc, new_nthreads: master_set_numthreads);
1603	__kmp_add_threads_to_team(team: parent_team, new_nthreads: master_set_numthreads);
1604	}
1605	parent_team->t.t_nproc = master_set_numthreads;
1606	for (i = `0`; i < master_set_numthreads; ++i) {
1607	other_threads[i]->th.th_team_nproc = master_set_numthreads;
1608	}
1609	}
1610	// Keep extra threads hot in the team for possible next parallels
1611	master_th->th.th_set_nproc = `0`;
1612	}
1613
1614	#if USE_DEBUGGER
1615	if (__kmp_debugging) { // Let debugger override number of threads.
1616	int nth = __kmp_omp_num_threads(loc);
1617	if (nth > `0`) { // 0 means debugger doesn't want to change num threads
1618	master_set_numthreads = nth;
1619	}
1620	}
1621	#endif
1622
1623	// Figure out the proc_bind policy for the nested parallel within teams
1624	kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1625	// proc_bind_default means don't update
1626	kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1627	if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1628	proc_bind = proc_bind_false;
1629	} else {
1630	// No proc_bind clause specified; use current proc-bind-var
1631	if (proc_bind == proc_bind_default) {
1632	proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1633	}
1634	/ else: The proc_bind policy was specified explicitly on parallel clause.*
1635	This overrides proc-bind-var for this parallel region, but does not
1636	change proc-bind-var. /*
1637	// Figure the value of proc-bind-var for the child threads.
1638	if ((level + `1` < __kmp_nested_proc_bind.used) &&
1639	(__kmp_nested_proc_bind.bind_types[level + `1`] !=
1640	master_th->th.th_current_task->td_icvs.proc_bind)) {
1641	proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + `1`];
1642	}
1643	}
1644	KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1645	// Need to change the bind-var ICV to correct value for each implicit task
1646	if (proc_bind_icv != proc_bind_default &&
1647	master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1648	kmp_info_t **other_threads = parent_team->t.t_threads;
1649	for (i = `0`; i < master_th->th.th_team_nproc; ++i) {
1650	other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1651	}
1652	}
1653	// Reset for next parallel region
1654	master_th->th.th_set_proc_bind = proc_bind_default;
1655
1656	#if USE_ITT_BUILD && USE_ITT_NOTIFY
1657	if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) \|\|
1658	KMP_ITT_DEBUG) &&
1659	__kmp_forkjoin_frames_mode == `3` &&
1660	parent_team->t.t_active_level == `1` // only report frames at level 1
1661	&& master_th->th.th_teams_size.nteams == `1`) {
1662	kmp_uint64 tmp_time = __itt_get_timestamp();
1663	master_th->th.th_frame_time = tmp_time;
1664	parent_team->t.t_region_time = tmp_time;
1665	}
1666	if (__itt_stack_caller_create_ptr) {
1667	KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1668	// create new stack stitching id before entering fork barrier
1669	parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1670	}
1671	#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1672	#if KMP_AFFINITY_SUPPORTED
1673	__kmp_partition_places(team: parent_team);
1674	#endif
1675
1676	KF_TRACE(`10`, ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1677	"master_th=%p, gtid=%d\n",
1678	root, parent_team, master_th, gtid));
1679	__kmp_internal_fork(id: loc, gtid, team: parent_team);
1680	KF_TRACE(`10`, ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1681	"master_th=%p, gtid=%d\n",
1682	root, parent_team, master_th, gtid));
1683
1684	if (call_context == fork_context_gnu)
1685	return TRUE;
1686
1687	/ Invoke microtask for PRIMARY thread /
1688	KA_TRACE(`20`, ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1689	parent_team->t.t_id, parent_team->t.t_pkfn));
1690
1691	if (!parent_team->t.t_invoke(gtid)) {
1692	KMP_ASSERT2(`0`, "cannot invoke microtask for PRIMARY thread");
1693	}
1694	KA_TRACE(`20`, ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1695	parent_team->t.t_id, parent_team->t.t_pkfn));
1696	KMP_MB(); / Flush all pending memory write invalidates. /
1697
1698	KA_TRACE(`20`, ("__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1699
1700	return TRUE;
1701	}
1702
1703	// Create a serialized parallel region
1704	static inline int
1705	__kmp_serial_fork_call(ident_t loc, int* gtid, enum fork_context_e call_context,
1706	kmp_int32 argc, microtask_t microtask, launch_t invoker,
1707	kmp_info_t master_th, kmp_team_t parent_team,
1708	#if OMPT_SUPPORT
1709	ompt_data_t ompt_parallel_data, void* **return_address,
1710	ompt_data_t **parent_task_data,
1711	#endif
1712	kmp_va_list ap) {
1713	kmp_team_t *team;
1714	int i;
1715	void **argv;
1716
1717	/ josh todo: hypothetical question: what do we do for OS X? /*
1718	#if KMP_OS_LINUX && \
1719	(KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 \|\| KMP_ARCH_ARM \|\| KMP_ARCH_AARCH64)
1720	SimpleVLA<void *> args(argc);
1721	#else
1722	void *args = (void* *)KMP_ALLOCA(argc sizeof(void *));
1723	#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 \|\| KMP_ARCH_ARM \|\| \
1724	KMP_ARCH_AARCH64) */
1725
1726	KA_TRACE(
1727	`20`, ("__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1728
1729	__kmpc_serialized_parallel(loc, global_tid: gtid);
1730
1731	#if OMPD_SUPPORT
1732	master_th->th.th_serial_team->t.t_pkfn = microtask;
1733	#endif
1734
1735	if (call_context == fork_context_intel) {
1736	/ TODO this sucks, use the compiler itself to pass args! :) /
1737	master_th->th.th_serial_team->t.t_ident = loc;
1738	if (!ap) {
1739	// revert change made in __kmpc_serialized_parallel()
1740	master_th->th.th_serial_team->t.t_level--;
1741	// Get args from parent team for teams construct
1742
1743	#if OMPT_SUPPORT
1744	void *dummy;
1745	void **exit_frame_p;
1746	ompt_task_info_t *task_info;
1747	ompt_lw_taskteam_t lw_taskteam;
1748
1749	if (ompt_enabled.enabled) {
1750	__ompt_lw_taskteam_init(lwt: &lw_taskteam, thr: master_th, gtid,
1751	ompt_pid: ompt_parallel_data, codeptr: *return_address);
1752
1753	__ompt_lw_taskteam_link(lwt: &lw_taskteam, thr: master_th, on_heap: `0`);
1754	// don't use lw_taskteam after linking. content was swaped
1755	task_info = OMPT_CUR_TASK_INFO(master_th);
1756	exit_frame_p = &(task_info->frame.exit_frame.ptr);
1757	if (ompt_enabled.ompt_callback_implicit_task) {
1758	OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1759	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1760	ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1761	&(task_info->task_data), `1`,
1762	OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1763	}
1764
1765	/ OMPT state /
1766	master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1767	} else {
1768	exit_frame_p = &dummy;
1769	}
1770	#endif
1771
1772	{
1773	KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1774	KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1775	__kmp_invoke_microtask(pkfn: microtask, gtid, npr: `0`, argc, argv: parent_team->t.t_argv
1776	#if OMPT_SUPPORT
1777	,
1778	exit_frame_ptr: exit_frame_p
1779	#endif
1780	);
1781	}
1782
1783	#if OMPT_SUPPORT
1784	if (ompt_enabled.enabled) {
1785	*exit_frame_p = NULL;
1786	if (ompt_enabled.ompt_callback_implicit_task) {
1787	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1788	ompt_scope_end, NULL, &(task_info->task_data), `1`,
1789	OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1790	}
1791	ompt_parallel_data = OMPT_CUR_TEAM_DATA(master_th);
1792	__ompt_lw_taskteam_unlink(thr: master_th);
1793	if (ompt_enabled.ompt_callback_parallel_end) {
1794	ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1795	ompt_parallel_data, *parent_task_data,
1796	OMPT_INVOKER(call_context) \| ompt_parallel_team, *return_address);
1797	}
1798	master_th->th.ompt_thread_info.state = ompt_state_overhead;
1799	}
1800	#endif
1801	} else if (microtask == (microtask_t)__kmp_teams_master) {
1802	KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1803	team = master_th->th.th_team;
1804	// team->t.t_pkfn = microtask;
1805	team->t.t_invoke = invoker;
1806	__kmp_alloc_argv_entries(argc, team, TRUE);
1807	team->t.t_argc = argc;
1808	argv = (void **)team->t.t_argv;
1809	for (i = argc - `1`; i >= `0`; --i)
1810	argv++ = va_arg(kmp_va_deref(ap), void* *);
1811	// AC: revert change made in __kmpc_serialized_parallel()
1812	// because initial code in teams should have level=0
1813	team->t.t_level--;
1814	// AC: call special invoker for outer "parallel" of teams construct
1815	invoker(gtid);
1816	#if OMPT_SUPPORT
1817	if (ompt_enabled.enabled) {
1818	ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1819	if (ompt_enabled.ompt_callback_implicit_task) {
1820	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1821	ompt_scope_end, NULL, &(task_info->task_data), `0`,
1822	OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1823	}
1824	if (ompt_enabled.ompt_callback_parallel_end) {
1825	ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1826	ompt_parallel_data, *parent_task_data,
1827	OMPT_INVOKER(call_context) \| ompt_parallel_league,
1828	*return_address);
1829	}
1830	master_th->th.ompt_thread_info.state = ompt_state_overhead;
1831	}
1832	#endif
1833	} else {
1834	argv = args;
1835	for (i = argc - `1`; i >= `0`; --i)
1836	argv++ = va_arg(kmp_va_deref(ap), void* *);
1837	KMP_MB();
1838
1839	#if OMPT_SUPPORT
1840	void *dummy;
1841	void **exit_frame_p;
1842	ompt_task_info_t *task_info;
1843	ompt_lw_taskteam_t lw_taskteam;
1844	ompt_data_t *implicit_task_data;
1845
1846	if (ompt_enabled.enabled) {
1847	__ompt_lw_taskteam_init(lwt: &lw_taskteam, thr: master_th, gtid,
1848	ompt_pid: ompt_parallel_data, codeptr: *return_address);
1849	__ompt_lw_taskteam_link(lwt: &lw_taskteam, thr: master_th, on_heap: `0`);
1850	// don't use lw_taskteam after linking. content was swaped
1851	task_info = OMPT_CUR_TASK_INFO(master_th);
1852	exit_frame_p = &(task_info->frame.exit_frame.ptr);
1853
1854	/ OMPT implicit task begin /
1855	implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1856	if (ompt_enabled.ompt_callback_implicit_task) {
1857	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1858	ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1859	implicit_task_data, `1`, __kmp_tid_from_gtid(gtid),
1860	ompt_task_implicit);
1861	OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1862	}
1863
1864	/ OMPT state /
1865	master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1866	} else {
1867	exit_frame_p = &dummy;
1868	}
1869	#endif
1870
1871	{
1872	KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1873	KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1874	__kmp_invoke_microtask(pkfn: microtask, gtid, npr: `0`, argc, argv: args
1875	#if OMPT_SUPPORT
1876	,
1877	exit_frame_ptr: exit_frame_p
1878	#endif
1879	);
1880	}
1881
1882	#if OMPT_SUPPORT
1883	if (ompt_enabled.enabled) {
1884	*exit_frame_p = NULL;
1885	if (ompt_enabled.ompt_callback_implicit_task) {
1886	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1887	ompt_scope_end, NULL, &(task_info->task_data), `1`,
1888	OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1889	}
1890
1891	ompt_parallel_data = OMPT_CUR_TEAM_DATA(master_th);
1892	__ompt_lw_taskteam_unlink(thr: master_th);
1893	if (ompt_enabled.ompt_callback_parallel_end) {
1894	ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1895	ompt_parallel_data, *parent_task_data,
1896	OMPT_INVOKER(call_context) \| ompt_parallel_team, *return_address);
1897	}
1898	master_th->th.ompt_thread_info.state = ompt_state_overhead;
1899	}
1900	#endif
1901	}
1902	} else if (call_context == fork_context_gnu) {
1903	#if OMPT_SUPPORT
1904	if (ompt_enabled.enabled) {
1905	ompt_lw_taskteam_t lwt;
1906	__ompt_lw_taskteam_init(lwt: &lwt, thr: master_th, gtid, ompt_pid: ompt_parallel_data,
1907	codeptr: *return_address);
1908
1909	lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1910	__ompt_lw_taskteam_link(lwt: &lwt, thr: master_th, on_heap: `1`);
1911	}
1912	// don't use lw_taskteam after linking. content was swaped
1913	#endif
1914
1915	// we were called from GNU native code
1916	KA_TRACE(`20`, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1917	return FALSE;
1918	} else {
1919	KMP_ASSERT2(call_context < fork_context_last,
1920	"__kmp_serial_fork_call: unknown fork_context parameter");
1921	}
1922
1923	KA_TRACE(`20`, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1924	KMP_MB();
1925	return FALSE;
1926	}
1927
1928	/ most of the work for a fork /
1929	/ return true if we really went parallel, false if serialized /
1930	int __kmp_fork_call(ident_t loc, int* gtid,
1931	enum fork_context_e call_context, // Intel, GNU, ...
1932	kmp_int32 argc, microtask_t microtask, launch_t invoker,
1933	kmp_va_list ap) {
1934	void **argv;
1935	int i;
1936	int master_tid;
1937	int master_this_cons;
1938	kmp_team_t *team;
1939	kmp_team_t *parent_team;
1940	kmp_info_t *master_th;
1941	kmp_root_t *root;
1942	int nthreads;
1943	int master_active;
1944	int master_set_numthreads;
1945	int task_thread_limit = `0`;
1946	int level;
1947	int active_level;
1948	int teams_level;
1949	#if KMP_NESTED_HOT_TEAMS
1950	kmp_hot_team_ptr_t **p_hot_teams;
1951	#endif
1952	{ // KMP_TIME_BLOCK
1953	KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1954	KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
1955
1956	KA_TRACE(`20`, ("__kmp_fork_call: enter T#%d\n", gtid));
1957	if (__kmp_stkpadding > `0` && __kmp_root[gtid] != NULL) {
1958	/ Some systems prefer the stack for the root thread(s) to start with /
1959	/ some gap from the parent stack to prevent false sharing. /
1960	void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1961	/ These 2 lines below are so this does not get optimized out /
1962	if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1963	__kmp_stkpadding += (short)((kmp_int64)dummy);
1964	}
1965
1966	/ initialize if needed /
1967	KMP_DEBUG_ASSERT(
1968	__kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown
1969	if (!TCR_4(__kmp_init_parallel))
1970	__kmp_parallel_initialize();
1971	__kmp_resume_if_soft_paused();
1972
1973	/ setup current data /
1974	// AC: potentially unsafe, not in sync with library shutdown,
1975	// __kmp_threads can be freed
1976	master_th = __kmp_threads[gtid];
1977
1978	parent_team = master_th->th.th_team;
1979	master_tid = master_th->th.th_info.ds.ds_tid;
1980	master_this_cons = master_th->th.th_local.this_construct;
1981	root = master_th->th.th_root;
1982	master_active = root->r.r_active;
1983	master_set_numthreads = master_th->th.th_set_nproc;
1984	task_thread_limit =
1985	master_th->th.th_current_task->td_icvs.task_thread_limit;
1986
1987	#if OMPT_SUPPORT
1988	ompt_data_t ompt_parallel_data = ompt_data_none;
1989	ompt_data_t *parent_task_data = NULL;
1990	ompt_frame_t *ompt_frame = NULL;
1991	void *return_address = NULL;
1992
1993	if (ompt_enabled.enabled) {
1994	__ompt_get_task_info_internal(ancestor_level: `0`, NULL, task_data: &parent_task_data, task_frame: &ompt_frame,
1995	NULL, NULL);
1996	return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1997	}
1998	#endif
1999
2000	// Assign affinity to root thread if it hasn't happened yet
2001	__kmp_assign_root_init_mask();
2002
2003	// Nested level will be an index in the nested nthreads array
2004	level = parent_team->t.t_level;
2005	// used to launch non-serial teams even if nested is not allowed
2006	active_level = parent_team->t.t_active_level;
2007	// needed to check nesting inside the teams
2008	teams_level = master_th->th.th_teams_level;
2009	#if KMP_NESTED_HOT_TEAMS
2010	p_hot_teams = &master_th->th.th_hot_teams;
2011	if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > `0`) {
2012	p_hot_teams = (kmp_hot_team_ptr_t )__kmp_allocate(
2013	sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
2014	(*p_hot_teams)[`0`].hot_team = root->r.r_hot_team;
2015	// it is either actual or not needed (when active_level > 0)
2016	(*p_hot_teams)[`0`].hot_team_nth = `1`;
2017	}
2018	#endif
2019
2020	#if OMPT_SUPPORT
2021	if (ompt_enabled.enabled) {
2022	if (ompt_enabled.ompt_callback_parallel_begin) {
2023	int team_size = master_set_numthreads
2024	? master_set_numthreads
2025	: get__nproc_2(parent_team, master_tid);
2026	int flags = OMPT_INVOKER(call_context) \|
2027	((microtask == (microtask_t)__kmp_teams_master)
2028	? ompt_parallel_league
2029	: ompt_parallel_team);
2030	ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
2031	parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
2032	return_address);
2033	}
2034	master_th->th.ompt_thread_info.state = ompt_state_overhead;
2035	}
2036	#endif
2037
2038	master_th->th.th_ident = loc;
2039
2040	// Parallel closely nested in teams construct:
2041	if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
2042	return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
2043	call_context, microtask, invoker,
2044	master_set_numthreads, level,
2045	#if OMPT_SUPPORT
2046	ompt_parallel_data, return_address,
2047	#endif
2048	ap);
2049	} // End parallel closely nested in teams construct
2050
2051	// Need this to happen before we determine the number of threads, not while
2052	// we are allocating the team
2053	//__kmp_push_current_task_to_thread(master_th, parent_team, 0);
2054
2055	KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(parent_team, master_th);
2056
2057	// Determine the number of threads
2058	int enter_teams =
2059	__kmp_is_entering_teams(active_level, level, teams_level, ap);
2060	if ((!enter_teams &&
2061	(parent_team->t.t_active_level >=
2062	master_th->th.th_current_task->td_icvs.max_active_levels)) \|\|
2063	(__kmp_library == library_serial)) {
2064	KC_TRACE(`10`, ("__kmp_fork_call: T#%d serializing team\n", gtid));
2065	nthreads = `1`;
2066	} else {
2067	nthreads = master_set_numthreads
2068	? master_set_numthreads
2069	// TODO: get nproc directly from current task
2070	: get__nproc_2(parent_team, master_tid);
2071	// Use the thread_limit set for the current target task if exists, else go
2072	// with the deduced nthreads
2073	nthreads = task_thread_limit > `0` && task_thread_limit < nthreads
2074	? task_thread_limit
2075	: nthreads;
2076	// Check if we need to take forkjoin lock? (no need for serialized
2077	// parallel out of teams construct).
2078	if (nthreads > `1`) {
2079	/ determine how many new threads we can use /
2080	__kmp_acquire_bootstrap_lock(lck: &__kmp_forkjoin_lock);
2081	/ AC: If we execute teams from parallel region (on host), then teams*
2082	should be created but each can only have 1 thread if nesting is
2083	disabled. If teams called from serial region, then teams and their
2084	threads should be created regardless of the nesting setting. /*
2085	nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2086	set_nthreads: nthreads, enter_teams);
2087	if (nthreads == `1`) {
2088	// Free lock for single thread execution here; for multi-thread
2089	// execution it will be freed later after team of threads created
2090	// and initialized
2091	__kmp_release_bootstrap_lock(lck: &__kmp_forkjoin_lock);
2092	}
2093	}
2094	}
2095	KMP_DEBUG_ASSERT(nthreads > `0`);
2096
2097	// If we temporarily changed the set number of threads then restore it now
2098	master_th->th.th_set_nproc = `0`;
2099
2100	if (nthreads == `1`) {
2101	return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2102	invoker, master_th, parent_team,
2103	#if OMPT_SUPPORT
2104	ompt_parallel_data: &ompt_parallel_data, return_address: &return_address,
2105	parent_task_data: &parent_task_data,
2106	#endif
2107	ap);
2108	} // if (nthreads == 1)
2109
2110	// GEH: only modify the executing flag in the case when not serialized
2111	// serialized case is handled in kmpc_serialized_parallel
2112	KF_TRACE(`10`, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2113	"curtask=%p, curtask_max_aclevel=%d\n",
2114	parent_team->t.t_active_level, master_th,
2115	master_th->th.th_current_task,
2116	master_th->th.th_current_task->td_icvs.max_active_levels));
2117	// TODO: GEH - cannot do this assertion because root thread not set up as
2118	// executing
2119	// KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
2120	master_th->th.th_current_task->td_flags.executing = `0`;
2121
2122	if (!master_th->th.th_teams_microtask \|\| level > teams_level) {
2123	/ Increment our nested depth level /
2124	KMP_ATOMIC_INC(&root->r.r_in_parallel);
2125	}
2126
2127	// See if we need to make a copy of the ICVs.
2128	int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2129	kmp_nested_nthreads_t *nested_nth = NULL;
2130	if (!master_th->th.th_set_nested_nth &&
2131	(level + `1` < parent_team->t.t_nested_nth->used) &&
2132	(parent_team->t.t_nested_nth->nth[level + `1`] != nthreads_icv)) {
2133	nthreads_icv = parent_team->t.t_nested_nth->nth[level + `1`];
2134	} else if (master_th->th.th_set_nested_nth) {
2135	nested_nth = __kmp_override_nested_nth(thr: master_th, level);
2136	if ((level + `1` < nested_nth->used) &&
2137	(nested_nth->nth[level + `1`] != nthreads_icv))
2138	nthreads_icv = nested_nth->nth[level + `1`];
2139	else
2140	nthreads_icv = `0`; // don't update
2141	} else {
2142	nthreads_icv = `0`; // don't update
2143	}
2144
2145	// Figure out the proc_bind_policy for the new team.
2146	kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2147	// proc_bind_default means don't update
2148	kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2149	if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2150	proc_bind = proc_bind_false;
2151	} else {
2152	// No proc_bind clause specified; use current proc-bind-var for this
2153	// parallel region
2154	if (proc_bind == proc_bind_default) {
2155	proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2156	}
2157	// Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND
2158	if (master_th->th.th_teams_microtask &&
2159	microtask == (microtask_t)__kmp_teams_master) {
2160	proc_bind = __kmp_teams_proc_bind;
2161	}
2162	/ else: The proc_bind policy was specified explicitly on parallel clause.*
2163	This overrides proc-bind-var for this parallel region, but does not
2164	change proc-bind-var. /*
2165	// Figure the value of proc-bind-var for the child threads.
2166	if ((level + `1` < __kmp_nested_proc_bind.used) &&
2167	(__kmp_nested_proc_bind.bind_types[level + `1`] !=
2168	master_th->th.th_current_task->td_icvs.proc_bind)) {
2169	// Do not modify the proc bind icv for the two teams construct forks
2170	// They just let the proc bind icv pass through
2171	if (!master_th->th.th_teams_microtask \|\|
2172	!(microtask == (microtask_t)__kmp_teams_master \|\| ap == NULL))
2173	proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + `1`];
2174	}
2175	}
2176
2177	// Reset for next parallel region
2178	master_th->th.th_set_proc_bind = proc_bind_default;
2179
2180	if ((nthreads_icv > `0`) \|\| (proc_bind_icv != proc_bind_default)) {
2181	kmp_internal_control_t new_icvs;
2182	copy_icvs(dst: &new_icvs, src: &master_th->th.th_current_task->td_icvs);
2183	new_icvs.next = NULL;
2184	if (nthreads_icv > `0`) {
2185	new_icvs.nproc = nthreads_icv;
2186	}
2187	if (proc_bind_icv != proc_bind_default) {
2188	new_icvs.proc_bind = proc_bind_icv;
2189	}
2190
2191	/ allocate a new parallel team /
2192	KF_TRACE(`10`, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2193	team = __kmp_allocate_team(root, new_nproc: nthreads, max_nproc: nthreads,
2194	#if OMPT_SUPPORT
2195	ompt_parallel_data,
2196	#endif
2197	proc_bind, new_icvs: &new_icvs,
2198	argc USE_NESTED_HOT_ARG(master_th));
2199	if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2200	copy_icvs(dst: (kmp_internal_control_t *)team->t.b->team_icvs, src: &new_icvs);
2201	} else {
2202	/ allocate a new parallel team /
2203	KF_TRACE(`10`, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2204	team = __kmp_allocate_team(root, new_nproc: nthreads, max_nproc: nthreads,
2205	#if OMPT_SUPPORT
2206	ompt_parallel_data,
2207	#endif
2208	proc_bind,
2209	new_icvs: &master_th->th.th_current_task->td_icvs,
2210	argc USE_NESTED_HOT_ARG(master_th));
2211	if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2212	copy_icvs(dst: (kmp_internal_control_t *)team->t.b->team_icvs,
2213	src: &master_th->th.th_current_task->td_icvs);
2214	}
2215	KF_TRACE(
2216	`10`, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2217
2218	/ setup the new team /
2219	KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2220	KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2221	KMP_CHECK_UPDATE(team->t.t_ident, loc);
2222	KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2223	KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2224	#if OMPT_SUPPORT
2225	KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2226	return_address);
2227	#endif
2228	KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
2229	// TODO: parent_team->t.t_level == INT_MAX ???
2230	if (!master_th->th.th_teams_microtask \|\| level > teams_level) {
2231	int new_level = parent_team->t.t_level + `1`;
2232	KMP_CHECK_UPDATE(team->t.t_level, new_level);
2233	new_level = parent_team->t.t_active_level + `1`;
2234	KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2235	} else {
2236	// AC: Do not increase parallel level at start of the teams construct
2237	int new_level = parent_team->t.t_level;
2238	KMP_CHECK_UPDATE(team->t.t_level, new_level);
2239	new_level = parent_team->t.t_active_level;
2240	KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2241	}
2242	kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2243	// set primary thread's schedule as new run-time schedule
2244	KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2245
2246	KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2247	KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2248
2249	// Check if hot team has potentially outdated list, and if so, free it
2250	if (team->t.t_nested_nth &&
2251	team->t.t_nested_nth != parent_team->t.t_nested_nth) {
2252	KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
2253	KMP_INTERNAL_FREE(team->t.t_nested_nth);
2254	team->t.t_nested_nth = NULL;
2255	}
2256	team->t.t_nested_nth = parent_team->t.t_nested_nth;
2257	if (master_th->th.th_set_nested_nth) {
2258	if (!nested_nth)
2259	nested_nth = __kmp_override_nested_nth(thr: master_th, level);
2260	team->t.t_nested_nth = nested_nth;
2261	KMP_INTERNAL_FREE(master_th->th.th_set_nested_nth);
2262	master_th->th.th_set_nested_nth = NULL;
2263	master_th->th.th_set_nested_nth_sz = `0`;
2264	master_th->th.th_nt_strict = false;
2265	}
2266
2267	// Update the floating point rounding in the team if required.
2268	propagateFPControl(team);
2269	#if OMPD_SUPPORT
2270	if (ompd_state & OMPD_ENABLE_BP)
2271	ompd_bp_parallel_begin();
2272	#endif
2273
2274	KA_TRACE(
2275	`20`,
2276	("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2277	gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2278	team->t.t_nproc));
2279	KMP_DEBUG_ASSERT(team != root->r.r_hot_team \|\|
2280	(team->t.t_master_tid == `0` &&
2281	(team->t.t_parent == root->r.r_root_team \|\|
2282	team->t.t_parent->t.t_serialized)));
2283	KMP_MB();
2284
2285	/ now, setup the arguments /
2286	argv = (void **)team->t.t_argv;
2287	if (ap) {
2288	for (i = argc - `1`; i >= `0`; --i) {
2289	void new_argv = va_arg(kmp_va_deref(ap), void* *);
2290	KMP_CHECK_UPDATE(*argv, new_argv);
2291	argv++;
2292	}
2293	} else {
2294	for (i = `0`; i < argc; ++i) {
2295	// Get args from parent team for teams construct
2296	KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2297	}
2298	}
2299
2300	/ now actually fork the threads /
2301	KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2302	if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
2303	root->r.r_active = TRUE;
2304
2305	__kmp_fork_team_threads(root, team, master_th, master_gtid: gtid, fork_teams_workers: !ap);
2306	__kmp_setup_icv_copy(team, new_nproc: nthreads,
2307	new_icvs: &master_th->th.th_current_task->td_icvs, loc);
2308
2309	#if OMPT_SUPPORT
2310	master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2311	#endif
2312
2313	__kmp_release_bootstrap_lock(lck: &__kmp_forkjoin_lock);
2314
2315	#if USE_ITT_BUILD
2316	if (team->t.t_active_level == `1` // only report frames at level 1
2317	&& !master_th->th.th_teams_microtask) { // not in teams construct
2318	#if USE_ITT_NOTIFY
2319	if ((__itt_frame_submit_v3_ptr \|\| KMP_ITT_DEBUG) &&
2320	(__kmp_forkjoin_frames_mode == `3` \|\|
2321	__kmp_forkjoin_frames_mode == `1`)) {
2322	kmp_uint64 tmp_time = `0`;
2323	if (__itt_get_timestamp_ptr)
2324	tmp_time = __itt_get_timestamp();
2325	// Internal fork - report frame begin
2326	master_th->th.th_frame_time = tmp_time;
2327	if (__kmp_forkjoin_frames_mode == `3`)
2328	team->t.t_region_time = tmp_time;
2329	} else
2330	// only one notification scheme (either "submit" or "forking/joined", not both)
2331	#endif /* USE_ITT_NOTIFY */
2332	if ((__itt_frame_begin_v3_ptr \|\| KMP_ITT_DEBUG) &&
2333	__kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2334	// Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
2335	__kmp_itt_region_forking(gtid, team_size: team->t.t_nproc, barriers: `0`);
2336	}
2337	}
2338	#endif /* USE_ITT_BUILD */
2339
2340	/ now go on and do the work /
2341	KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2342	KMP_MB();
2343	KF_TRACE(`10`,
2344	("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2345	root, team, master_th, gtid));
2346
2347	#if USE_ITT_BUILD
2348	if (__itt_stack_caller_create_ptr) {
2349	// create new stack stitching id before entering fork barrier
2350	if (!enter_teams) {
2351	KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2352	team->t.t_stack_id = __kmp_itt_stack_caller_create();
2353	} else if (parent_team->t.t_serialized) {
2354	// keep stack stitching id in the serialized parent_team;
2355	// current team will be used for parallel inside the teams;
2356	// if parent_team is active, then it already keeps stack stitching id
2357	// for the league of teams
2358	KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2359	parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2360	}
2361	}
2362	#endif /* USE_ITT_BUILD */
2363
2364	// AC: skip __kmp_internal_fork at teams construct, let only primary
2365	// threads execute
2366	if (ap) {
2367	__kmp_internal_fork(id: loc, gtid, team);
2368	KF_TRACE(`10`, ("__kmp_internal_fork : after : root=%p, team=%p, "
2369	"master_th=%p, gtid=%d\n",
2370	root, team, master_th, gtid));
2371	}
2372
2373	if (call_context == fork_context_gnu) {
2374	KA_TRACE(`20`, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2375	return TRUE;
2376	}
2377
2378	/ Invoke microtask for PRIMARY thread /
2379	KA_TRACE(`20`, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2380	team->t.t_id, team->t.t_pkfn));
2381	} // END of timer KMP_fork_call block
2382
2383	#if KMP_STATS_ENABLED
2384	// If beginning a teams construct, then change thread state
2385	stats_state_e previous_state = KMP_GET_THREAD_STATE();
2386	if (!ap) {
2387	KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2388	}
2389	#endif
2390
2391	if (!team->t.t_invoke(gtid)) {
2392	KMP_ASSERT2(`0`, "cannot invoke microtask for PRIMARY thread");
2393	}
2394
2395	#if KMP_STATS_ENABLED
2396	// If was beginning of a teams construct, then reset thread state
2397	if (!ap) {
2398	KMP_SET_THREAD_STATE(previous_state);
2399	}
2400	#endif
2401
2402	KA_TRACE(`20`, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2403	team->t.t_id, team->t.t_pkfn));
2404	KMP_MB(); / Flush all pending memory write invalidates. /
2405
2406	KA_TRACE(`20`, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2407	#if OMPT_SUPPORT
2408	if (ompt_enabled.enabled) {
2409	master_th->th.ompt_thread_info.state = ompt_state_overhead;
2410	}
2411	#endif
2412
2413	return TRUE;
2414	}
2415
2416	#if OMPT_SUPPORT
2417	static inline void __kmp_join_restore_state(kmp_info_t *thread,
2418	kmp_team_t *team) {
2419	// restore state outside the region
2420	thread->th.ompt_thread_info.state =
2421	((team->t.t_serialized) ? ompt_state_work_serial
2422	: ompt_state_work_parallel);
2423	}
2424
2425	static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
2426	kmp_team_t team, ompt_data_t parallel_data,
2427	int flags, void *codeptr) {
2428	ompt_task_info_t *task_info = __ompt_get_task_info_object(depth: `0`);
2429	if (ompt_enabled.ompt_callback_parallel_end) {
2430	ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2431	parallel_data, &(task_info->task_data), flags, codeptr);
2432	}
2433
2434	task_info->frame.enter_frame = ompt_data_none;
2435	__kmp_join_restore_state(thread, team);
2436	}
2437	#endif
2438
2439	void __kmp_join_call(ident_t loc, int* gtid
2440	#if OMPT_SUPPORT
2441	,
2442	enum fork_context_e fork_context
2443	#endif
2444	,
2445	int exit_teams) {
2446	KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2447	kmp_team_t *team;
2448	kmp_team_t *parent_team;
2449	kmp_info_t *master_th;
2450	kmp_root_t *root;
2451	int master_active;
2452
2453	KA_TRACE(`20`, ("__kmp_join_call: enter T#%d\n", gtid));
2454
2455	/ setup current data /
2456	master_th = __kmp_threads[gtid];
2457	root = master_th->th.th_root;
2458	team = master_th->th.th_team;
2459	parent_team = team->t.t_parent;
2460
2461	master_th->th.th_ident = loc;
2462
2463	#if OMPT_SUPPORT
2464	void team_microtask = (void* *)team->t.t_pkfn;
2465	// For GOMP interface with serialized parallel, need the
2466	// __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task
2467	// and end-parallel events.
2468	if (ompt_enabled.enabled &&
2469	!(team->t.t_serialized && fork_context == fork_context_gnu)) {
2470	master_th->th.ompt_thread_info.state = ompt_state_overhead;
2471	}
2472	#endif
2473
2474	#if KMP_DEBUG
2475	if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2476	KA_TRACE(`20`, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2477	"th_task_team = %p\n",
2478	__kmp_gtid_from_thread(master_th), team,
2479	team->t.t_task_team[master_th->th.th_task_state],
2480	master_th->th.th_task_team));
2481	KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, master_th);
2482	}
2483	#endif
2484
2485	if (team->t.t_serialized) {
2486	if (master_th->th.th_teams_microtask) {
2487	// We are in teams construct
2488	int level = team->t.t_level;
2489	int tlevel = master_th->th.th_teams_level;
2490	if (level == tlevel) {
2491	// AC: we haven't incremented it earlier at start of teams construct,
2492	// so do it here - at the end of teams construct
2493	team->t.t_level++;
2494	} else if (level == tlevel + `1`) {
2495	// AC: we are exiting parallel inside teams, need to increment
2496	// serialization in order to restore it in the next call to
2497	// __kmpc_end_serialized_parallel
2498	team->t.t_serialized++;
2499	}
2500	}
2501	__kmpc_end_serialized_parallel(loc, global_tid: gtid);
2502
2503	#if OMPT_SUPPORT
2504	if (ompt_enabled.enabled) {
2505	if (fork_context == fork_context_gnu) {
2506	__ompt_lw_taskteam_unlink(thr: master_th);
2507	}
2508	__kmp_join_restore_state(thread: master_th, team: parent_team);
2509	}
2510	#endif
2511
2512	return;
2513	}
2514
2515	master_active = team->t.t_master_active;
2516
2517	if (!exit_teams) {
2518	// AC: No barrier for internal teams at exit from teams construct.
2519	// But there is barrier for external team (league).
2520	__kmp_internal_join(id: loc, gtid, team);
2521	#if USE_ITT_BUILD
2522	if (__itt_stack_caller_create_ptr) {
2523	KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2524	// destroy the stack stitching id after join barrier
2525	__kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2526	team->t.t_stack_id = NULL;
2527	}
2528	#endif
2529	} else {
2530	master_th->th.th_task_state =
2531	`0`; // AC: no tasking in teams (out of any parallel)
2532	#if USE_ITT_BUILD
2533	if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2534	KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2535	// destroy the stack stitching id on exit from the teams construct
2536	// if parent_team is active, then the id will be destroyed later on
2537	// by master of the league of teams
2538	__kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2539	parent_team->t.t_stack_id = NULL;
2540	}
2541	#endif
2542	}
2543
2544	KMP_MB();
2545
2546	#if OMPT_SUPPORT
2547	ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2548	void *codeptr = team->t.ompt_team_info.master_return_address;
2549	#endif
2550
2551	#if USE_ITT_BUILD
2552	// Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
2553	if (team->t.t_active_level == `1` &&
2554	(!master_th->th.th_teams_microtask \|\| / not in teams construct /
2555	master_th->th.th_teams_size.nteams == `1`)) {
2556	master_th->th.th_ident = loc;
2557	// only one notification scheme (either "submit" or "forking/joined", not
2558	// both)
2559	if ((__itt_frame_submit_v3_ptr \|\| KMP_ITT_DEBUG) &&
2560	__kmp_forkjoin_frames_mode == `3`)
2561	__kmp_itt_frame_submit(gtid, begin: team->t.t_region_time,
2562	end: master_th->th.th_frame_time, imbalance: `0`, loc,
2563	team_size: master_th->th.th_team_nproc, region: `1`);
2564	else if ((__itt_frame_end_v3_ptr \|\| KMP_ITT_DEBUG) &&
2565	!__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2566	__kmp_itt_region_joined(gtid);
2567	} // active_level == 1
2568	#endif /* USE_ITT_BUILD */
2569
2570	#if KMP_AFFINITY_SUPPORTED
2571	if (!exit_teams) {
2572	// Restore master thread's partition.
2573	master_th->th.th_first_place = team->t.t_first_place;
2574	master_th->th.th_last_place = team->t.t_last_place;
2575	}
2576	#endif // KMP_AFFINITY_SUPPORTED
2577
2578	if (master_th->th.th_teams_microtask && !exit_teams &&
2579	team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2580	team->t.t_level == master_th->th.th_teams_level + `1`) {
2581	// AC: We need to leave the team structure intact at the end of parallel
2582	// inside the teams construct, so that at the next parallel same (hot) team
2583	// works, only adjust nesting levels
2584	#if OMPT_SUPPORT
2585	ompt_data_t ompt_parallel_data = ompt_data_none;
2586	if (ompt_enabled.enabled) {
2587	ompt_task_info_t *task_info = __ompt_get_task_info_object(depth: `0`);
2588	if (ompt_enabled.ompt_callback_implicit_task) {
2589	int ompt_team_size = team->t.t_nproc;
2590	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2591	ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2592	OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2593	}
2594	task_info->frame.exit_frame = ompt_data_none;
2595	task_info->task_data = ompt_data_none;
2596	ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2597	__ompt_lw_taskteam_unlink(thr: master_th);
2598	}
2599	#endif
2600	/ Decrement our nested depth level /
2601	team->t.t_level--;
2602	team->t.t_active_level--;
2603	KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2604
2605	// Restore number of threads in the team if needed. This code relies on
2606	// the proper adjustment of th_teams_size.nth after the fork in
2607	// __kmp_teams_master on each teams primary thread in the case that
2608	// __kmp_reserve_threads reduced it.
2609	if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2610	int old_num = master_th->th.th_team_nproc;
2611	int new_num = master_th->th.th_teams_size.nth;
2612	kmp_info_t **other_threads = team->t.t_threads;
2613	team->t.t_nproc = new_num;
2614	for (int i = `0`; i < old_num; ++i) {
2615	other_threads[i]->th.th_team_nproc = new_num;
2616	}
2617	// Adjust states of non-used threads of the team
2618	for (int i = old_num; i < new_num; ++i) {
2619	// Re-initialize thread's barrier data.
2620	KMP_DEBUG_ASSERT(other_threads[i]);
2621	kmp_balign_t *balign = other_threads[i]->th.th_bar;
2622	for (int b = `0`; b < bs_last_barrier; ++b) {
2623	balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2624	KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2625	#if USE_DEBUGGER
2626	balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2627	#endif
2628	}
2629	if (__kmp_tasking_mode != tskm_immediate_exec) {
2630	// Synchronize thread's task state
2631	other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2632	}
2633	}
2634	}
2635
2636	#if OMPT_SUPPORT
2637	if (ompt_enabled.enabled) {
2638	__kmp_join_ompt(gtid, thread: master_th, team: parent_team, parallel_data: &ompt_parallel_data,
2639	OMPT_INVOKER(fork_context) \| ompt_parallel_team, codeptr);
2640	}
2641	#endif
2642
2643	return;
2644	}
2645
2646	/ do cleanup and restore the parent team /
2647	master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2648	master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2649
2650	master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2651
2652	/ jc: The following lock has instructions with REL and ACQ semantics,*
2653	separating the parallel user code called in this parallel region
2654	from the serial user code called after this function returns. /*
2655	__kmp_acquire_bootstrap_lock(lck: &__kmp_forkjoin_lock);
2656
2657	if (!master_th->th.th_teams_microtask \|\|
2658	team->t.t_level > master_th->th.th_teams_level) {
2659	/ Decrement our nested depth level /
2660	KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2661	}
2662	KMP_DEBUG_ASSERT(root->r.r_in_parallel >= `0`);
2663
2664	#if OMPT_SUPPORT
2665	if (ompt_enabled.enabled) {
2666	ompt_task_info_t *task_info = __ompt_get_task_info_object(depth: `0`);
2667	if (ompt_enabled.ompt_callback_implicit_task) {
2668	int flags = (team_microtask == (void *)__kmp_teams_master)
2669	? ompt_task_initial
2670	: ompt_task_implicit;
2671	int ompt_team_size = (flags == ompt_task_initial) ? `0` : team->t.t_nproc;
2672	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2673	ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2674	OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2675	}
2676	task_info->frame.exit_frame = ompt_data_none;
2677	task_info->task_data = ompt_data_none;
2678	}
2679	#endif
2680
2681	KF_TRACE(`10`, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", `0`,
2682	master_th, team));
2683	__kmp_pop_current_task_from_thread(this_thr: master_th);
2684
2685	master_th->th.th_def_allocator = team->t.t_def_allocator;
2686
2687	#if OMPD_SUPPORT
2688	if (ompd_state & OMPD_ENABLE_BP)
2689	ompd_bp_parallel_end();
2690	#endif
2691	updateHWFPControl(team);
2692
2693	if (root->r.r_active != master_active)
2694	root->r.r_active = master_active;
2695
2696	__kmp_free_team(root, team USE_NESTED_HOT_ARG(
2697	master_th)); // this will free worker threads
2698
2699	/ this race was fun to find. make sure the following is in the critical*
2700	region otherwise assertions may fail occasionally since the old team may be
2701	reallocated and the hierarchy appears inconsistent. it is actually safe to
2702	run and won't cause any bugs, but will cause those assertion failures. it's
2703	only one deref&assign so might as well put this in the critical region /*
2704	master_th->th.th_team = parent_team;
2705	master_th->th.th_team_nproc = parent_team->t.t_nproc;
2706	master_th->th.th_team_master = parent_team->t.t_threads[`0`];
2707	master_th->th.th_team_serialized = parent_team->t.t_serialized;
2708
2709	/ restore serialized team, if need be /
2710	if (parent_team->t.t_serialized &&
2711	parent_team != master_th->th.th_serial_team &&
2712	parent_team != root->r.r_root_team) {
2713	__kmp_free_team(root,
2714	master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2715	master_th->th.th_serial_team = parent_team;
2716	}
2717
2718	if (__kmp_tasking_mode != tskm_immediate_exec) {
2719	// Restore primary thread's task state from team structure
2720	KMP_DEBUG_ASSERT(team->t.t_primary_task_state == `0` \|\|
2721	team->t.t_primary_task_state == `1`);
2722	master_th->th.th_task_state = (kmp_uint8)team->t.t_primary_task_state;
2723
2724	// Copy the task team from the parent team to the primary thread
2725	master_th->th.th_task_team =
2726	parent_team->t.t_task_team[master_th->th.th_task_state];
2727	KA_TRACE(`20`,
2728	("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2729	__kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2730	parent_team));
2731	}
2732
2733	// TODO: GEH - cannot do this assertion because root thread not set up as
2734	// executing
2735	// KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2736	master_th->th.th_current_task->td_flags.executing = `1`;
2737
2738	__kmp_release_bootstrap_lock(lck: &__kmp_forkjoin_lock);
2739
2740	#if KMP_AFFINITY_SUPPORTED
2741	if (master_th->th.th_team->t.t_level == `0` && __kmp_affinity.flags.reset) {
2742	__kmp_reset_root_init_mask(gtid);
2743	}
2744	#endif
2745	#if OMPT_SUPPORT
2746	int flags =
2747	OMPT_INVOKER(fork_context) \|
2748	((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league
2749	: ompt_parallel_team);
2750	if (ompt_enabled.enabled) {
2751	__kmp_join_ompt(gtid, thread: master_th, team: parent_team, parallel_data, flags,
2752	codeptr);
2753	}
2754	#endif
2755
2756	KMP_MB();
2757	KA_TRACE(`20`, ("__kmp_join_call: exit T#%d\n", gtid));
2758	}
2759
2760	/ Check whether we should push an internal control record onto the*
2761	serial team stack. If so, do it. /*
2762	void __kmp_save_internal_controls(kmp_info_t *thread) {
2763
2764	if (thread->th.th_team != thread->th.th_serial_team) {
2765	return;
2766	}
2767	if (thread->th.th_team->t.t_serialized > `1`) {
2768	int push = `0`;
2769
2770	if (thread->th.th_team->t.t_control_stack_top == NULL) {
2771	push = `1`;
2772	} else {
2773	if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2774	thread->th.th_team->t.t_serialized) {
2775	push = `1`;
2776	}
2777	}
2778	if (push) { / push a record on the serial team's stack /
2779	kmp_internal_control_t *control =
2780	(kmp_internal_control_t *)__kmp_allocate(
2781	sizeof(kmp_internal_control_t));
2782
2783	copy_icvs(dst: control, src: &thread->th.th_current_task->td_icvs);
2784
2785	control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2786
2787	control->next = thread->th.th_team->t.t_control_stack_top;
2788	thread->th.th_team->t.t_control_stack_top = control;
2789	}
2790	}
2791	}
2792
2793	/ Changes set_nproc /
2794	void __kmp_set_num_threads(int new_nth, int gtid) {
2795	kmp_info_t *thread;
2796	kmp_root_t *root;
2797
2798	KF_TRACE(`10`, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2799	KMP_DEBUG_ASSERT(__kmp_init_serial);
2800
2801	if (new_nth < `1`)
2802	new_nth = `1`;
2803	else if (new_nth > __kmp_max_nth)
2804	new_nth = __kmp_max_nth;
2805
2806	KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
2807	thread = __kmp_threads[gtid];
2808	if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2809	return; // nothing to do
2810
2811	__kmp_save_internal_controls(thread);
2812
2813	set__nproc(thread, new_nth);
2814
2815	// If this omp_set_num_threads() call will cause the hot team size to be
2816	// reduced (in the absence of a num_threads clause), then reduce it now,
2817	// rather than waiting for the next parallel region.
2818	root = thread->th.th_root;
2819	if (__kmp_init_parallel && (!root->r.r_active) &&
2820	(root->r.r_hot_team->t.t_nproc > new_nth)
2821	#if KMP_NESTED_HOT_TEAMS
2822	&& __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2823	#endif
2824	) {
2825	kmp_team_t *hot_team = root->r.r_hot_team;
2826	int f;
2827
2828	__kmp_acquire_bootstrap_lock(lck: &__kmp_forkjoin_lock);
2829
2830	if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2831	__kmp_resize_dist_barrier(team: hot_team, old_nthreads: hot_team->t.t_nproc, new_nthreads: new_nth);
2832	}
2833	// Release the extra threads we don't need any more.
2834	for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2835	KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2836	if (__kmp_tasking_mode != tskm_immediate_exec) {
2837	// When decreasing team size, threads no longer in the team should unref
2838	// task team.
2839	hot_team->t.t_threads[f]->th.th_task_team = NULL;
2840	}
2841	__kmp_free_thread(hot_team->t.t_threads[f]);
2842	hot_team->t.t_threads[f] = NULL;
2843	}
2844	hot_team->t.t_nproc = new_nth;
2845	#if KMP_NESTED_HOT_TEAMS
2846	if (thread->th.th_hot_teams) {
2847	KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[`0`].hot_team);
2848	thread->th.th_hot_teams[`0`].hot_team_nth = new_nth;
2849	}
2850	#endif
2851
2852	if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2853	hot_team->t.b->update_num_threads(nthr: new_nth);
2854	__kmp_add_threads_to_team(team: hot_team, new_nthreads: new_nth);
2855	}
2856
2857	__kmp_release_bootstrap_lock(lck: &__kmp_forkjoin_lock);
2858
2859	// Update the t_nproc field in the threads that are still active.
2860	for (f = `0`; f < new_nth; f++) {
2861	KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2862	hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2863	}
2864	// Special flag in case omp_set_num_threads() call
2865	hot_team->t.t_size_changed = -`1`;
2866	}
2867	}
2868
2869	/ Changes max_active_levels /
2870	void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2871	kmp_info_t *thread;
2872
2873	KF_TRACE(`10`, ("__kmp_set_max_active_levels: new max_active_levels for thread "
2874	"%d = (%d)\n",
2875	gtid, max_active_levels));
2876	KMP_DEBUG_ASSERT(__kmp_init_serial);
2877
2878	// validate max_active_levels
2879	if (max_active_levels < `0`) {
2880	KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2881	// We ignore this call if the user has specified a negative value.
2882	// The current setting won't be changed. The last valid setting will be
2883	// used. A warning will be issued (if warnings are allowed as controlled by
2884	// the KMP_WARNINGS env var).
2885	KF_TRACE(`10`, ("__kmp_set_max_active_levels: the call is ignored: new "
2886	"max_active_levels for thread %d = (%d)\n",
2887	gtid, max_active_levels));
2888	return;
2889	}
2890	if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2891	// it's OK, the max_active_levels is within the valid range: [ 0;
2892	// KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2893	// We allow a zero value. (implementation defined behavior)
2894	} else {
2895	KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2896	KMP_MAX_ACTIVE_LEVELS_LIMIT);
2897	max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2898	// Current upper limit is MAX_INT. (implementation defined behavior)
2899	// If the input exceeds the upper limit, we correct the input to be the
2900	// upper limit. (implementation defined behavior)
2901	// Actually, the flow should never get here until we use MAX_INT limit.
2902	}
2903	KF_TRACE(`10`, ("__kmp_set_max_active_levels: after validation: new "
2904	"max_active_levels for thread %d = (%d)\n",
2905	gtid, max_active_levels));
2906
2907	thread = __kmp_threads[gtid];
2908
2909	__kmp_save_internal_controls(thread);
2910
2911	set__max_active_levels(thread, max_active_levels);
2912	}
2913
2914	/ Gets max_active_levels /
2915	int __kmp_get_max_active_levels(int gtid) {
2916	kmp_info_t *thread;
2917
2918	KF_TRACE(`10`, ("__kmp_get_max_active_levels: thread %d\n", gtid));
2919	KMP_DEBUG_ASSERT(__kmp_init_serial);
2920
2921	thread = __kmp_threads[gtid];
2922	KMP_DEBUG_ASSERT(thread->th.th_current_task);
2923	KF_TRACE(`10`, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
2924	"curtask_maxaclevel=%d\n",
2925	gtid, thread->th.th_current_task,
2926	thread->th.th_current_task->td_icvs.max_active_levels));
2927	return thread->th.th_current_task->td_icvs.max_active_levels;
2928	}
2929
2930	// nteams-var per-device ICV
2931	void __kmp_set_num_teams(int num_teams) {
2932	if (num_teams > `0`)
2933	__kmp_nteams = num_teams;
2934	}
2935	int __kmp_get_max_teams(void) { return __kmp_nteams; }
2936	// teams-thread-limit-var per-device ICV
2937	void __kmp_set_teams_thread_limit(int limit) {
2938	if (limit > `0`)
2939	__kmp_teams_thread_limit = limit;
2940	}
2941	int __kmp_get_teams_thread_limit(void) { return __kmp_teams_thread_limit; }
2942
2943	KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int));
2944	KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int));
2945
2946	/ Changes def_sched_var ICV values (run-time schedule kind and chunk) /
2947	void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2948	kmp_info_t *thread;
2949	kmp_sched_t orig_kind;
2950	// kmp_team_t team;*
2951
2952	KF_TRACE(`10`, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2953	gtid, (int)kind, chunk));
2954	KMP_DEBUG_ASSERT(__kmp_init_serial);
2955
2956	// Check if the kind parameter is valid, correct if needed.
2957	// Valid parameters should fit in one of two intervals - standard or extended:
2958	// <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2959	// 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2960	orig_kind = kind;
2961	kind = __kmp_sched_without_mods(kind);
2962
2963	if (kind <= kmp_sched_lower \|\| kind >= kmp_sched_upper \|\|
2964	(kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2965	// TODO: Hint needs attention in case we change the default schedule.
2966	__kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2967	KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"),
2968	__kmp_msg_null);
2969	kind = kmp_sched_default;
2970	chunk = `0`; // ignore chunk value in case of bad kind
2971	}
2972
2973	thread = __kmp_threads[gtid];
2974
2975	__kmp_save_internal_controls(thread);
2976
2977	if (kind < kmp_sched_upper_std) {
2978	if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2979	// differ static chunked vs. unchunked: chunk should be invalid to
2980	// indicate unchunked schedule (which is the default)
2981	thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
2982	} else {
2983	thread->th.th_current_task->td_icvs.sched.r_sched_type =
2984	__kmp_sch_map[kind - kmp_sched_lower - `1`];
2985	}
2986	} else {
2987	// __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2988	// kmp_sched_lower - 2 ];
2989	thread->th.th_current_task->td_icvs.sched.r_sched_type =
2990	__kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2991	kmp_sched_lower - `2`];
2992	}
2993	__kmp_sched_apply_mods_intkind(
2994	kind: orig_kind, internal_kind: &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2995	if (kind == kmp_sched_auto \|\| chunk < `1`) {
2996	// ignore parameter chunk for schedule auto
2997	thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2998	} else {
2999	thread->th.th_current_task->td_icvs.sched.chunk = chunk;
3000	}
3001	}
3002
3003	/ Gets def_sched_var ICV values /
3004	void __kmp_get_schedule(int gtid, kmp_sched_t kind, int* *chunk) {
3005	kmp_info_t *thread;
3006	enum sched_type th_type;
3007
3008	KF_TRACE(`10`, ("__kmp_get_schedule: thread %d\n", gtid));
3009	KMP_DEBUG_ASSERT(__kmp_init_serial);
3010
3011	thread = __kmp_threads[gtid];
3012
3013	th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
3014	switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
3015	case kmp_sch_static:
3016	case kmp_sch_static_greedy:
3017	case kmp_sch_static_balanced:
3018	*kind = kmp_sched_static;
3019	__kmp_sched_apply_mods_stdkind(kind, internal_kind: th_type);
3020	chunk = `0`; // chunk was not set, try to show this fact via zero value*
3021	return;
3022	case kmp_sch_static_chunked:
3023	*kind = kmp_sched_static;
3024	break;
3025	case kmp_sch_dynamic_chunked:
3026	*kind = kmp_sched_dynamic;
3027	break;
3028	case kmp_sch_guided_chunked:
3029	case kmp_sch_guided_iterative_chunked:
3030	case kmp_sch_guided_analytical_chunked:
3031	*kind = kmp_sched_guided;
3032	break;
3033	case kmp_sch_auto:
3034	*kind = kmp_sched_auto;
3035	break;
3036	case kmp_sch_trapezoidal:
3037	*kind = kmp_sched_trapezoidal;
3038	break;
3039	#if KMP_STATIC_STEAL_ENABLED
3040	case kmp_sch_static_steal:
3041	*kind = kmp_sched_static_steal;
3042	break;
3043	#endif
3044	default:
3045	KMP_FATAL(UnknownSchedulingType, th_type);
3046	}
3047
3048	__kmp_sched_apply_mods_stdkind(kind, internal_kind: th_type);
3049	*chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3050	}
3051
3052	int __kmp_get_ancestor_thread_num(int gtid, int level) {
3053
3054	int ii, dd;
3055	kmp_team_t *team;
3056	kmp_info_t *thr;
3057
3058	KF_TRACE(`10`, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3059	KMP_DEBUG_ASSERT(__kmp_init_serial);
3060
3061	// validate level
3062	if (level == `0`)
3063	return `0`;
3064	if (level < `0`)
3065	return -`1`;
3066	thr = __kmp_threads[gtid];
3067	team = thr->th.th_team;
3068	ii = team->t.t_level;
3069	if (level > ii)
3070	return -`1`;
3071
3072	if (thr->th.th_teams_microtask) {
3073	// AC: we are in teams region where multiple nested teams have same level
3074	int tlevel = thr->th.th_teams_level; // the level of the teams construct
3075	if (level <=
3076	tlevel) { // otherwise usual algorithm works (will not touch the teams)
3077	KMP_DEBUG_ASSERT(ii >= tlevel);
3078	// AC: As we need to pass by the teams league, we need to artificially
3079	// increase ii
3080	if (ii == tlevel) {
3081	ii += `2`; // three teams have same level
3082	} else {
3083	ii++; // two teams have same level
3084	}
3085	}
3086	}
3087
3088	if (ii == level)
3089	return __kmp_tid_from_gtid(gtid);
3090
3091	dd = team->t.t_serialized;
3092	level++;
3093	while (ii > level) {
3094	for (dd = team->t.t_serialized; (dd > `0`) && (ii > level); dd--, ii--) {
3095	}
3096	if ((team->t.t_serialized) && (!dd)) {
3097	team = team->t.t_parent;
3098	continue;
3099	}
3100	if (ii > level) {
3101	team = team->t.t_parent;
3102	dd = team->t.t_serialized;
3103	ii--;
3104	}
3105	}
3106
3107	return (dd > `1`) ? (`0`) : (team->t.t_master_tid);
3108	}
3109
3110	int __kmp_get_team_size(int gtid, int level) {
3111
3112	int ii, dd;
3113	kmp_team_t *team;
3114	kmp_info_t *thr;
3115
3116	KF_TRACE(`10`, ("__kmp_get_team_size: thread %d %d\n", gtid, level));
3117	KMP_DEBUG_ASSERT(__kmp_init_serial);
3118
3119	// validate level
3120	if (level == `0`)
3121	return `1`;
3122	if (level < `0`)
3123	return -`1`;
3124	thr = __kmp_threads[gtid];
3125	team = thr->th.th_team;
3126	ii = team->t.t_level;
3127	if (level > ii)
3128	return -`1`;
3129
3130	if (thr->th.th_teams_microtask) {
3131	// AC: we are in teams region where multiple nested teams have same level
3132	int tlevel = thr->th.th_teams_level; // the level of the teams construct
3133	if (level <=
3134	tlevel) { // otherwise usual algorithm works (will not touch the teams)
3135	KMP_DEBUG_ASSERT(ii >= tlevel);
3136	// AC: As we need to pass by the teams league, we need to artificially
3137	// increase ii
3138	if (ii == tlevel) {
3139	ii += `2`; // three teams have same level
3140	} else {
3141	ii++; // two teams have same level
3142	}
3143	}
3144	}
3145
3146	while (ii > level) {
3147	for (dd = team->t.t_serialized; (dd > `0`) && (ii > level); dd--, ii--) {
3148	}
3149	if (team->t.t_serialized && (!dd)) {
3150	team = team->t.t_parent;
3151	continue;
3152	}
3153	if (ii > level) {
3154	team = team->t.t_parent;
3155	ii--;
3156	}
3157	}
3158
3159	return team->t.t_nproc;
3160	}
3161
3162	kmp_r_sched_t __kmp_get_schedule_global() {
3163	// This routine created because pairs (__kmp_sched, __kmp_chunk) and
3164	// (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
3165	// independently. So one can get the updated schedule here.
3166
3167	kmp_r_sched_t r_sched;
3168
3169	// create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
3170	// __kmp_guided. __kmp_sched should keep original value, so that user can set
3171	// KMP_SCHEDULE multiple times, and thus have different run-time schedules in
3172	// different roots (even in OMP 2.5)
3173	enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3174	enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3175	if (s == kmp_sch_static) {
3176	// replace STATIC with more detailed schedule (balanced or greedy)
3177	r_sched.r_sched_type = __kmp_static;
3178	} else if (s == kmp_sch_guided_chunked) {
3179	// replace GUIDED with more detailed schedule (iterative or analytical)
3180	r_sched.r_sched_type = __kmp_guided;
3181	} else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
3182	r_sched.r_sched_type = __kmp_sched;
3183	}
3184	SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3185
3186	if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3187	// __kmp_chunk may be wrong here (if it was not ever set)
3188	r_sched.chunk = KMP_DEFAULT_CHUNK;
3189	} else {
3190	r_sched.chunk = __kmp_chunk;
3191	}
3192
3193	return r_sched;
3194	}
3195
3196	/ Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)*
3197	at least argc number of t_argv entries for the requested team. /
3198	static void __kmp_alloc_argv_entries(int argc, kmp_team_t team, int* realloc) {
3199
3200	KMP_DEBUG_ASSERT(team);
3201	if (!realloc \|\| argc > team->t.t_max_argc) {
3202
3203	KA_TRACE(`100`, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3204	"current entries=%d\n",
3205	team->t.t_id, argc, (realloc) ? team->t.t_max_argc : `0`));
3206	/ if previously allocated heap space for args, free them /
3207	if (realloc && team->t.t_argv != &team->t.t_inline_argv[`0`])
3208	__kmp_free((void *)team->t.t_argv);
3209
3210	if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3211	/ use unused space in the cache line for arguments /
3212	team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3213	KA_TRACE(`100`, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
3214	"argv entries\n",
3215	team->t.t_id, team->t.t_max_argc));
3216	team->t.t_argv = &team->t.t_inline_argv[`0`];
3217	if (__kmp_storage_map) {
3218	__kmp_print_storage_map_gtid(
3219	gtid: -`1`, p1: &team->t.t_inline_argv[`0`],
3220	p2: &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3221	size: (sizeof(void ) KMP_INLINE_ARGV_ENTRIES), format: "team_%d.t_inline_argv",
3222	team->t.t_id);
3223	}
3224	} else {
3225	/ allocate space for arguments in the heap /
3226	team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> `1`))
3227	? KMP_MIN_MALLOC_ARGV_ENTRIES
3228	: `2` * argc;
3229	KA_TRACE(`100`, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3230	"argv entries\n",
3231	team->t.t_id, team->t.t_max_argc));
3232	team->t.t_argv =
3233	(void )__kmp_page_allocate(sizeof*(void* ) team->t.t_max_argc);
3234	if (__kmp_storage_map) {
3235	__kmp_print_storage_map_gtid(gtid: -`1`, p1: &team->t.t_argv[`0`],
3236	p2: &team->t.t_argv[team->t.t_max_argc],
3237	size: sizeof(void ) team->t.t_max_argc,
3238	format: "team_%d.t_argv", team->t.t_id);
3239	}
3240	}
3241	}
3242	}
3243
3244	static void __kmp_allocate_team_arrays(kmp_team_t team, int* max_nth) {
3245	int i;
3246	int num_disp_buff = max_nth > `1` ? __kmp_dispatch_num_buffers : `2`;
3247	team->t.t_threads =
3248	(kmp_info_t )__kmp_allocate(sizeof*(kmp_info_t ) * max_nth);
3249	team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3250	sizeof(dispatch_shared_info_t) * num_disp_buff);
3251	team->t.t_dispatch =
3252	(kmp_disp_t )__kmp_allocate(sizeof(kmp_disp_t) max_nth);
3253	team->t.t_implicit_task_taskdata =
3254	(kmp_taskdata_t )__kmp_allocate(sizeof(kmp_taskdata_t) max_nth);
3255	team->t.t_max_nproc = max_nth;
3256
3257	/ setup dispatch buffers /
3258	for (i = `0`; i < num_disp_buff; ++i) {
3259	team->t.t_disp_buffer[i].buffer_index = i;
3260	team->t.t_disp_buffer[i].doacross_buf_idx = i;
3261	}
3262	}
3263
3264	static void __kmp_free_team_arrays(kmp_team_t *team) {
3265	/ Note: this does not free the threads in t_threads (__kmp_free_threads) /
3266	int i;
3267	for (i = `0`; i < team->t.t_max_nproc; ++i) {
3268	if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3269	__kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3270	team->t.t_dispatch[i].th_disp_buffer = NULL;
3271	}
3272	}
3273	#if KMP_USE_HIER_SCHED
3274	__kmp_dispatch_free_hierarchies(team);
3275	#endif
3276	__kmp_free(team->t.t_threads);
3277	__kmp_free(team->t.t_disp_buffer);
3278	__kmp_free(team->t.t_dispatch);
3279	__kmp_free(team->t.t_implicit_task_taskdata);
3280	team->t.t_threads = NULL;
3281	team->t.t_disp_buffer = NULL;
3282	team->t.t_dispatch = NULL;
3283	team->t.t_implicit_task_taskdata = `0`;
3284	}
3285
3286	static void __kmp_reallocate_team_arrays(kmp_team_t team, int* max_nth) {
3287	kmp_info_t **oldThreads = team->t.t_threads;
3288
3289	__kmp_free(team->t.t_disp_buffer);
3290	__kmp_free(team->t.t_dispatch);
3291	__kmp_free(team->t.t_implicit_task_taskdata);
3292	__kmp_allocate_team_arrays(team, max_nth);
3293
3294	KMP_MEMCPY(dest: team->t.t_threads, src: oldThreads,
3295	n: team->t.t_nproc * sizeof(kmp_info_t *));
3296
3297	__kmp_free(oldThreads);
3298	}
3299
3300	static kmp_internal_control_t __kmp_get_global_icvs(void) {
3301
3302	kmp_r_sched_t r_sched =
3303	__kmp_get_schedule_global(); // get current state of scheduling globals
3304
3305	KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > `0`);
3306
3307	kmp_internal_control_t g_icvs = {
3308	.serial_nesting_level: `0`, // int serial_nesting_level; //corresponds to value of th_team_serialized
3309	.dynamic: (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3310	// adjustment of threads (per thread)
3311	.bt_set: (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3312	// whether blocktime is explicitly set
3313	.blocktime: __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
3314	#if KMP_USE_MONITOR
3315	__kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3316	// intervals
3317	#endif
3318	.nproc: __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3319	// next parallel region (per thread)
3320	// (use a max ub on value if __kmp_parallel_initialize not called yet)
3321	.thread_limit: __kmp_cg_max_nth, // int thread_limit;
3322	.task_thread_limit: __kmp_task_max_nth, // int task_thread_limit; // to set the thread_limit
3323	// on task. This is used in the case of target thread_limit
3324	.max_active_levels: __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3325	// for max_active_levels
3326	.sched: r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3327	// {sched,chunk} pair
3328	.proc_bind: __kmp_nested_proc_bind.bind_types[`0`],
3329	.default_device: __kmp_default_device,
3330	NULL // struct kmp_internal_control next;*
3331	};
3332
3333	return g_icvs;
3334	}
3335
3336	static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
3337
3338	kmp_internal_control_t gx_icvs;
3339	gx_icvs.serial_nesting_level =
3340	`0`; // probably =team->t.t_serial like in save_inter_controls
3341	copy_icvs(dst: &gx_icvs, src: &team->t.t_threads[`0`]->th.th_current_task->td_icvs);
3342	gx_icvs.next = NULL;
3343
3344	return gx_icvs;
3345	}
3346
3347	static void __kmp_initialize_root(kmp_root_t *root) {
3348	int f;
3349	kmp_team_t *root_team;
3350	kmp_team_t *hot_team;
3351	int hot_team_max_nth;
3352	kmp_r_sched_t r_sched =
3353	__kmp_get_schedule_global(); // get current state of scheduling globals
3354	kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3355	KMP_DEBUG_ASSERT(root);
3356	KMP_ASSERT(!root->r.r_begin);
3357
3358	/ setup the root state structure /
3359	__kmp_init_lock(lck: &root->r.r_begin_lock);
3360	root->r.r_begin = FALSE;
3361	root->r.r_active = FALSE;
3362	root->r.r_in_parallel = `0`;
3363	root->r.r_blocktime = __kmp_dflt_blocktime;
3364	#if KMP_AFFINITY_SUPPORTED
3365	root->r.r_affinity_assigned = FALSE;
3366	#endif
3367
3368	/ setup the root team for this task /
3369	/ allocate the root team structure /
3370	KF_TRACE(`10`, ("__kmp_initialize_root: before root_team\n"));
3371
3372	root_team =
3373	__kmp_allocate_team(root,
3374	new_nproc: `1`, // new_nproc
3375	max_nproc: `1`, // max_nproc
3376	#if OMPT_SUPPORT
3377	ompt_data_none, // root parallel id
3378	#endif
3379	proc_bind: __kmp_nested_proc_bind.bind_types[`0`], new_icvs: &r_icvs,
3380	argc: `0` // argc
3381	USE_NESTED_HOT_ARG(NULL) // primary thread is unknown
3382	);
3383	#if USE_DEBUGGER
3384	// Non-NULL value should be assigned to make the debugger display the root
3385	// team.
3386	TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~`0`));
3387	#endif
3388
3389	KF_TRACE(`10`, ("__kmp_initialize_root: after root_team = %p\n", root_team));
3390
3391	root->r.r_root_team = root_team;
3392	root_team->t.t_control_stack_top = NULL;
3393
3394	/ initialize root team /
3395	root_team->t.t_threads[`0`] = NULL;
3396	root_team->t.t_nproc = `1`;
3397	root_team->t.t_serialized = `1`;
3398	// TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3399	root_team->t.t_sched.sched = r_sched.sched;
3400	root_team->t.t_nested_nth = &__kmp_nested_nth;
3401	KA_TRACE(
3402	`20`,
3403	("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3404	root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3405
3406	/ setup the hot team for this task /
3407	/ allocate the hot team structure /
3408	KF_TRACE(`10`, ("__kmp_initialize_root: before hot_team\n"));
3409
3410	hot_team =
3411	__kmp_allocate_team(root,
3412	new_nproc: `1`, // new_nproc
3413	max_nproc: __kmp_dflt_team_nth_ub * `2`, // max_nproc
3414	#if OMPT_SUPPORT
3415	ompt_data_none, // root parallel id
3416	#endif
3417	proc_bind: __kmp_nested_proc_bind.bind_types[`0`], new_icvs: &r_icvs,
3418	argc: `0` // argc
3419	USE_NESTED_HOT_ARG(NULL) // primary thread is unknown
3420	);
3421	KF_TRACE(`10`, ("__kmp_initialize_root: after hot_team = %p\n", hot_team));
3422
3423	root->r.r_hot_team = hot_team;
3424	root_team->t.t_control_stack_top = NULL;
3425
3426	/ first-time initialization /
3427	hot_team->t.t_parent = root_team;
3428
3429	/ initialize hot team /
3430	hot_team_max_nth = hot_team->t.t_max_nproc;
3431	for (f = `0`; f < hot_team_max_nth; ++f) {
3432	hot_team->t.t_threads[f] = NULL;
3433	}
3434	hot_team->t.t_nproc = `1`;
3435	// TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3436	hot_team->t.t_sched.sched = r_sched.sched;
3437	hot_team->t.t_size_changed = `0`;
3438	hot_team->t.t_nested_nth = &__kmp_nested_nth;
3439	}
3440
3441	#ifdef KMP_DEBUG
3442
3443	typedef struct kmp_team_list_item {
3444	kmp_team_p const *entry;
3445	struct kmp_team_list_item *next;
3446	} kmp_team_list_item_t;
3447	typedef kmp_team_list_item_t *kmp_team_list_t;
3448
3449	static void __kmp_print_structure_team_accum( // Add team to list of teams.
3450	kmp_team_list_t list, // List of teams.
3451	kmp_team_p const team // Team to add.*
3452	) {
3453
3454	// List must terminate with item where both entry and next are NULL.
3455	// Team is added to the list only once.
3456	// List is sorted in ascending order by team id.
3457	// Team id is not* a key.*
3458
3459	kmp_team_list_t l;
3460
3461	KMP_DEBUG_ASSERT(list != NULL);
3462	if (team == NULL) {
3463	return;
3464	}
3465
3466	__kmp_print_structure_team_accum(list, team: team->t.t_parent);
3467	__kmp_print_structure_team_accum(list, team: team->t.t_next_pool);
3468
3469	// Search list for the team.
3470	l = list;
3471	while (l->next != NULL && l->entry != team) {
3472	l = l->next;
3473	}
3474	if (l->next != NULL) {
3475	return; // Team has been added before, exit.
3476	}
3477
3478	// Team is not found. Search list again for insertion point.
3479	l = list;
3480	while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3481	l = l->next;
3482	}
3483
3484	// Insert team.
3485	{
3486	kmp_team_list_item_t item = (kmp_team_list_item_t )KMP_INTERNAL_MALLOC(
3487	sizeof(kmp_team_list_item_t));
3488	item = l;
3489	l->entry = team;
3490	l->next = item;
3491	}
3492	}
3493
3494	static void __kmp_print_structure_team(char const title, kmp_team_p const* *team
3495
3496	) {
3497	__kmp_printf(format: "%s", title);
3498	if (team != NULL) {
3499	__kmp_printf(format: "%2x %p\n", team->t.t_id, team);
3500	} else {
3501	__kmp_printf(format: " - (nil)\n");
3502	}
3503	}
3504
3505	static void __kmp_print_structure_thread(char const *title,
3506	kmp_info_p const *thread) {
3507	__kmp_printf(format: "%s", title);
3508	if (thread != NULL) {
3509	__kmp_printf(format: "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3510	} else {
3511	__kmp_printf(format: " - (nil)\n");
3512	}
3513	}
3514
3515	void __kmp_print_structure(void) {
3516
3517	kmp_team_list_t list;
3518
3519	// Initialize list of teams.
3520	list =
3521	(kmp_team_list_item_t )KMP_INTERNAL_MALLOC(sizeof*(kmp_team_list_item_t));
3522	list->entry = NULL;
3523	list->next = NULL;
3524
3525	__kmp_printf(format: "\n------------------------------\nGlobal Thread "
3526	"Table\n------------------------------\n");
3527	{
3528	int gtid;
3529	for (gtid = `0`; gtid < __kmp_threads_capacity; ++gtid) {
3530	__kmp_printf(format: "%2d", gtid);
3531	if (__kmp_threads != NULL) {
3532	__kmp_printf(format: " %p", __kmp_threads[gtid]);
3533	}
3534	if (__kmp_root != NULL) {
3535	__kmp_printf(format: " %p", __kmp_root[gtid]);
3536	}
3537	__kmp_printf(format: "\n");
3538	}
3539	}
3540
3541	// Print out __kmp_threads array.
3542	__kmp_printf(format: "\n------------------------------\nThreads\n--------------------"
3543	"----------\n");
3544	if (__kmp_threads != NULL) {
3545	int gtid;
3546	for (gtid = `0`; gtid < __kmp_threads_capacity; ++gtid) {
3547	kmp_info_t const *thread = __kmp_threads[gtid];
3548	if (thread != NULL) {
3549	__kmp_printf(format: "GTID %2d %p:\n", gtid, thread);
3550	__kmp_printf(format: " Our Root: %p\n", thread->th.th_root);
3551	__kmp_print_structure_team(title: " Our Team: ", team: thread->th.th_team);
3552	__kmp_print_structure_team(title: " Serial Team: ",
3553	team: thread->th.th_serial_team);
3554	__kmp_printf(format: " Threads: %2d\n", thread->th.th_team_nproc);
3555	__kmp_print_structure_thread(title: " Primary: ",
3556	thread: thread->th.th_team_master);
3557	__kmp_printf(format: " Serialized?: %2d\n", thread->th.th_team_serialized);
3558	__kmp_printf(format: " Set NProc: %2d\n", thread->th.th_set_nproc);
3559	__kmp_printf(format: " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3560	__kmp_print_structure_thread(title: " Next in pool: ",
3561	thread: thread->th.th_next_pool);
3562	__kmp_printf(format: "\n");
3563	__kmp_print_structure_team_accum(list, team: thread->th.th_team);
3564	__kmp_print_structure_team_accum(list, team: thread->th.th_serial_team);
3565	}
3566	}
3567	} else {
3568	__kmp_printf(format: "Threads array is not allocated.\n");
3569	}
3570
3571	// Print out __kmp_root array.
3572	__kmp_printf(format: "\n------------------------------\nUbers\n----------------------"
3573	"--------\n");
3574	if (__kmp_root != NULL) {
3575	int gtid;
3576	for (gtid = `0`; gtid < __kmp_threads_capacity; ++gtid) {
3577	kmp_root_t const *root = __kmp_root[gtid];
3578	if (root != NULL) {
3579	__kmp_printf(format: "GTID %2d %p:\n", gtid, root);
3580	__kmp_print_structure_team(title: " Root Team: ", team: root->r.r_root_team);
3581	__kmp_print_structure_team(title: " Hot Team: ", team: root->r.r_hot_team);
3582	__kmp_print_structure_thread(title: " Uber Thread: ",
3583	thread: root->r.r_uber_thread);
3584	__kmp_printf(format: " Active?: %2d\n", root->r.r_active);
3585	__kmp_printf(format: " In Parallel: %2d\n",
3586	KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3587	__kmp_printf(format: "\n");
3588	__kmp_print_structure_team_accum(list, team: root->r.r_root_team);
3589	__kmp_print_structure_team_accum(list, team: root->r.r_hot_team);
3590	}
3591	}
3592	} else {
3593	__kmp_printf(format: "Ubers array is not allocated.\n");
3594	}
3595
3596	__kmp_printf(format: "\n------------------------------\nTeams\n----------------------"
3597	"--------\n");
3598	while (list->next != NULL) {
3599	kmp_team_p const *team = list->entry;
3600	int i;
3601	__kmp_printf(format: "Team %2x %p:\n", team->t.t_id, team);
3602	__kmp_print_structure_team(title: " Parent Team: ", team: team->t.t_parent);
3603	__kmp_printf(format: " Primary TID: %2d\n", team->t.t_master_tid);
3604	__kmp_printf(format: " Max threads: %2d\n", team->t.t_max_nproc);
3605	__kmp_printf(format: " Levels of serial: %2d\n", team->t.t_serialized);
3606	__kmp_printf(format: " Number threads: %2d\n", team->t.t_nproc);
3607	for (i = `0`; i < team->t.t_nproc; ++i) {
3608	__kmp_printf(format: " Thread %2d: ", i);
3609	__kmp_print_structure_thread(title: "", thread: team->t.t_threads[i]);
3610	}
3611	__kmp_print_structure_team(title: " Next in pool: ", team: team->t.t_next_pool);
3612	__kmp_printf(format: "\n");
3613	list = list->next;
3614	}
3615
3616	// Print out __kmp_thread_pool and __kmp_team_pool.
3617	__kmp_printf(format: "\n------------------------------\nPools\n----------------------"
3618	"--------\n");
3619	__kmp_print_structure_thread(title: "Thread pool: ",
3620	CCAST(kmp_info_t *, __kmp_thread_pool));
3621	__kmp_print_structure_team(title: "Team pool: ",
3622	CCAST(kmp_team_t *, __kmp_team_pool));
3623	__kmp_printf(format: "\n");
3624
3625	// Free team list.
3626	while (list != NULL) {
3627	kmp_team_list_item_t *item = list;
3628	list = list->next;
3629	KMP_INTERNAL_FREE(item);
3630	}
3631	}
3632
3633	#endif
3634
3635	//---------------------------------------------------------------------------
3636	// Stuff for per-thread fast random number generator
3637	// Table of primes
3638	static const unsigned __kmp_primes[] = {
3639	`0x9e3779b1`, `0xffe6cc59`, `0x2109f6dd`, `0x43977ab5`, `0xba5703f5`, `0xb495a877`,
3640	`0xe1626741`, `0x79695e6b`, `0xbc98c09f`, `0xd5bee2b3`, `0x287488f9`, `0x3af18231`,
3641	`0x9677cd4d`, `0xbe3a6929`, `0xadc6a877`, `0xdcf0674b`, `0xbe4d6fe9`, `0x5f15e201`,
3642	`0x99afc3fd`, `0xf3f16801`, `0xe222cfff`, `0x24ba5fdb`, `0x0620452d`, `0x79f149e3`,
3643	`0xc8b93f49`, `0x972702cd`, `0xb07dd827`, `0x6c97d5ed`, `0x085a3d61`, `0x46eb5ea7`,
3644	`0x3d9910ed`, `0x2e687b5b`, `0x29609227`, `0x6eb081f1`, `0x0954c4e1`, `0x9d114db9`,
3645	`0x542acfa9`, `0xb3e6bd7b`, `0x0742d917`, `0xe9f3ffa7`, `0x54581edb`, `0xf2480f45`,
3646	`0x0bb9288f`, `0xef1affc7`, `0x85fa0ca7`, `0x3ccc14db`, `0xe6baf34b`, `0x343377f7`,
3647	`0x5ca19031`, `0xe6d9293b`, `0xf0a9f391`, `0x5d2e980b`, `0xfc411073`, `0xc3749363`,
3648	`0xb892d829`, `0x3549366b`, `0x629750ad`, `0xb98294e5`, `0x892d9483`, `0xc235baf3`,
3649	`0x3d2402a3`, `0x6bdef3c9`, `0xbec333cd`, `0x40c9520f`};
3650
3651	//---------------------------------------------------------------------------
3652	// __kmp_get_random: Get a random number using a linear congruential method.
3653	unsigned short __kmp_get_random(kmp_info_t *thread) {
3654	unsigned x = thread->th.th_x;
3655	unsigned short r = (unsigned short)(x >> `16`);
3656
3657	thread->th.th_x = x * thread->th.th_a + `1`;
3658
3659	KA_TRACE(`30`, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
3660	thread->th.th_info.ds.ds_tid, r));
3661
3662	return r;
3663	}
3664	//--------------------------------------------------------
3665	// __kmp_init_random: Initialize a random number generator
3666	void __kmp_init_random(kmp_info_t *thread) {
3667	unsigned seed = thread->th.th_info.ds.ds_tid;
3668
3669	thread->th.th_a =
3670	__kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[`0`]))];
3671	thread->th.th_x = (seed + `1`) * thread->th.th_a + `1`;
3672	KA_TRACE(`30`,
3673	("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3674	}
3675
3676	#if KMP_OS_WINDOWS
3677	/ reclaim array entries for root threads that are already dead, returns number*
3678	* reclaimed */
3679	static int __kmp_reclaim_dead_roots(void) {
3680	int i, r = `0`;
3681
3682	for (i = `0`; i < __kmp_threads_capacity; ++i) {
3683	if (KMP_UBER_GTID(i) &&
3684	!__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3685	!__kmp_root[i]
3686	->r.r_active) { // AC: reclaim only roots died in non-active state
3687	r += __kmp_unregister_root_other_thread(i);
3688	}
3689	}
3690	return r;
3691	}
3692	#endif
3693
3694	/ This function attempts to create free entries in __kmp_threads and*
3695	__kmp_root, and returns the number of free entries generated.
3696
3697	For Windows OS static library, the first mechanism used is to reclaim array*
3698	entries for root threads that are already dead.
3699
3700	On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3701	__kmp_root, with appropriate update to __kmp_threads_capacity. Array
3702	capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3703	threadprivate cache array has been created. Synchronization with
3704	__kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
3705
3706	After any dead root reclamation, if the clipping value allows array expansion
3707	to result in the generation of a total of nNeed free slots, the function does
3708	that expansion. If not, nothing is done beyond the possible initial root
3709	thread reclamation.
3710
3711	If any argument is negative, the behavior is undefined. /*
3712	static int __kmp_expand_threads(int nNeed) {
3713	int added = `0`;
3714	int minimumRequiredCapacity;
3715	int newCapacity;
3716	kmp_info_t **newThreads;
3717	kmp_root_t **newRoot;
3718
3719	// All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so
3720	// resizing __kmp_threads does not need additional protection if foreign
3721	// threads are present
3722
3723	#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3724	/ only for Windows static library /
3725	/ reclaim array entries for root threads that are already dead /
3726	added = __kmp_reclaim_dead_roots();
3727
3728	if (nNeed) {
3729	nNeed -= added;
3730	if (nNeed < `0`)
3731	nNeed = `0`;
3732	}
3733	#endif
3734	if (nNeed <= `0`)
3735	return added;
3736
3737	// Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3738	// __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3739	// user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
3740	// > __kmp_max_nth in one of two ways:
3741	//
3742	// 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3743	// may not be reused by another thread, so we may need to increase
3744	// __kmp_threads_capacity to __kmp_max_nth + 1.
3745	//
3746	// 2) New foreign root(s) are encountered. We always register new foreign
3747	// roots. This may cause a smaller # of threads to be allocated at
3748	// subsequent parallel regions, but the worker threads hang around (and
3749	// eventually go to sleep) and need slots in the __kmp_threads[] array.
3750	//
3751	// Anyway, that is the reason for moving the check to see if
3752	// __kmp_max_nth was exceeded into __kmp_reserve_threads()
3753	// instead of having it performed here. -BB
3754
3755	KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3756
3757	/ compute expansion headroom to check if we can expand /
3758	if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3759	/ possible expansion too small -- give up /
3760	return added;
3761	}
3762	minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3763
3764	newCapacity = __kmp_threads_capacity;
3765	do {
3766	newCapacity = newCapacity <= (__kmp_sys_max_nth >> `1`) ? (newCapacity << `1`)
3767	: __kmp_sys_max_nth;
3768	} while (newCapacity < minimumRequiredCapacity);
3769	newThreads = (kmp_info_t **)__kmp_allocate(
3770	(sizeof(kmp_info_t ) + sizeof(kmp_root_t )) * newCapacity + CACHE_LINE);
3771	newRoot =
3772	(kmp_root_t *)((char* )newThreads + sizeof(kmp_info_t ) * newCapacity);
3773	KMP_MEMCPY(dest: newThreads, src: __kmp_threads,
3774	n: __kmp_threads_capacity * sizeof(kmp_info_t *));
3775	KMP_MEMCPY(dest: newRoot, src: __kmp_root,
3776	n: __kmp_threads_capacity * sizeof(kmp_root_t *));
3777	// Put old __kmp_threads array on a list. Any ongoing references to the old
3778	// list will be valid. This list is cleaned up at library shutdown.
3779	kmp_old_threads_list_t *node =
3780	(kmp_old_threads_list_t )__kmp_allocate(sizeof*(kmp_old_threads_list_t));
3781	node->threads = __kmp_threads;
3782	node->next = __kmp_old_threads_list;
3783	__kmp_old_threads_list = node;
3784
3785	(kmp_info_t *volatile *)&__kmp_threads = newThreads;
3786	(kmp_root_t *volatile *)&__kmp_root = newRoot;
3787	added += newCapacity - __kmp_threads_capacity;
3788	(volatile* int *)&__kmp_threads_capacity = newCapacity;
3789
3790	if (newCapacity > __kmp_tp_capacity) {
3791	__kmp_acquire_bootstrap_lock(lck: &__kmp_tp_cached_lock);
3792	if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3793	__kmp_threadprivate_resize_cache(newCapacity);
3794	} else { // increase __kmp_tp_capacity to correspond with kmp_threads size
3795	(volatile* int *)&__kmp_tp_capacity = newCapacity;
3796	}
3797	__kmp_release_bootstrap_lock(lck: &__kmp_tp_cached_lock);
3798	}
3799
3800	return added;
3801	}
3802
3803	/ Register the current thread as a root thread and obtain our gtid. We must*
3804	have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3805	thread that calls from __kmp_do_serial_initialize() /*
3806	int __kmp_register_root(int initial_thread) {
3807	kmp_info_t *root_thread;
3808	kmp_root_t *root;
3809	int gtid;
3810	int capacity;
3811	__kmp_acquire_bootstrap_lock(lck: &__kmp_forkjoin_lock);
3812	KA_TRACE(`20`, ("__kmp_register_root: entered\n"));
3813	KMP_MB();
3814
3815	/ 2007-03-02:*
3816	If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3817	initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3818	work as expected -- it may return false (that means there is at least one
3819	empty slot in __kmp_threads array), but it is possible the only free slot
3820	is #0, which is reserved for initial thread and so cannot be used for this
3821	one. Following code workarounds this bug.
3822
3823	However, right solution seems to be not reserving slot #0 for initial
3824	thread because:
3825	(1) there is no magic in slot #0,
3826	(2) we cannot detect initial thread reliably (the first thread which does
3827	serial initialization may be not a real initial thread).
3828	*/
3829	capacity = __kmp_threads_capacity;
3830	if (!initial_thread && TCR_PTR(__kmp_threads[`0`]) == NULL) {
3831	--capacity;
3832	}
3833
3834	// If it is not for initializing the hidden helper team, we need to take
3835	// __kmp_hidden_helper_threads_num out of the capacity because it is included
3836	// in __kmp_threads_capacity.
3837	if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3838	capacity -= __kmp_hidden_helper_threads_num;
3839	}
3840
3841	/ see if there are too many threads /
3842	if (__kmp_all_nth >= capacity && !__kmp_expand_threads(nNeed: `1`)) {
3843	if (__kmp_tp_cached) {
3844	__kmp_fatal(KMP_MSG(CantRegisterNewThread),
3845	KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3846	KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3847	} else {
3848	__kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3849	__kmp_msg_null);
3850	}
3851	}
3852
3853	// When hidden helper task is enabled, __kmp_threads is organized as follows:
3854	// 0: initial thread, also a regular OpenMP thread.
3855	// [1, __kmp_hidden_helper_threads_num]: slots for hidden helper threads.
3856	// [__kmp_hidden_helper_threads_num + 1, __kmp_threads_capacity): slots for
3857	// regular OpenMP threads.
3858	if (TCR_4(__kmp_init_hidden_helper_threads)) {
3859	// Find an available thread slot for hidden helper thread. Slots for hidden
3860	// helper threads start from 1 to __kmp_hidden_helper_threads_num.
3861	for (gtid = `1`; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3862	gtid <= __kmp_hidden_helper_threads_num;
3863	gtid++)
3864	;
3865	KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3866	KA_TRACE(`1`, ("__kmp_register_root: found slot in threads array for "
3867	"hidden helper thread: T#%d\n",
3868	gtid));
3869	} else {
3870	/ find an available thread slot /
3871	// Don't reassign the zero slot since we need that to only be used by
3872	// initial thread. Slots for hidden helper threads should also be skipped.
3873	if (initial_thread && TCR_PTR(__kmp_threads[`0`]) == NULL) {
3874	gtid = `0`;
3875	} else {
3876	for (gtid = __kmp_hidden_helper_threads_num + `1`;
3877	TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3878	;
3879	}
3880	KA_TRACE(
3881	`1`, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3882	KMP_ASSERT(gtid < __kmp_threads_capacity);
3883	}
3884
3885	/ update global accounting /
3886	__kmp_all_nth++;
3887	TCW_4(__kmp_nth, __kmp_nth + `1`);
3888
3889	// if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3890	// numbers of procs, and method #2 (keyed API call) for higher numbers.
3891	if (__kmp_adjust_gtid_mode) {
3892	if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3893	if (TCR_4(__kmp_gtid_mode) != `2`) {
3894	TCW_4(__kmp_gtid_mode, `2`);
3895	}
3896	} else {
3897	if (TCR_4(__kmp_gtid_mode) != `1`) {
3898	TCW_4(__kmp_gtid_mode, `1`);
3899	}
3900	}
3901	}
3902
3903	#ifdef KMP_ADJUST_BLOCKTIME
3904	/ Adjust blocktime to zero if necessary /
3905	/ Middle initialization might not have occurred yet /
3906	if (!__kmp_env_blocktime && (__kmp_avail_proc > `0`)) {
3907	if (__kmp_nth > __kmp_avail_proc) {
3908	__kmp_zero_bt = TRUE;
3909	}
3910	}
3911	#endif /* KMP_ADJUST_BLOCKTIME */
3912
3913	/ setup this new hierarchy /
3914	if (!(root = __kmp_root[gtid])) {
3915	root = __kmp_root[gtid] = (kmp_root_t )__kmp_allocate(sizeof*(kmp_root_t));
3916	KMP_DEBUG_ASSERT(!root->r.r_root_team);
3917	}
3918
3919	#if KMP_STATS_ENABLED
3920	// Initialize stats as soon as possible (right after gtid assignment).
3921	__kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3922	__kmp_stats_thread_ptr->startLife();
3923	KMP_SET_THREAD_STATE(SERIAL_REGION);
3924	KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
3925	#endif
3926	__kmp_initialize_root(root);
3927
3928	/ setup new root thread structure /
3929	if (root->r.r_uber_thread) {
3930	root_thread = root->r.r_uber_thread;
3931	} else {
3932	root_thread = (kmp_info_t )__kmp_allocate(sizeof*(kmp_info_t));
3933	if (__kmp_storage_map) {
3934	__kmp_print_thread_storage_map(thr: root_thread, gtid);
3935	}
3936	root_thread->th.th_info.ds.ds_gtid = gtid;
3937	#if OMPT_SUPPORT
3938	root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3939	#endif
3940	root_thread->th.th_root = root;
3941	if (__kmp_env_consistency_check) {
3942	root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3943	}
3944	#if USE_FAST_MEMORY
3945	__kmp_initialize_fast_memory(this_thr: root_thread);
3946	#endif /* USE_FAST_MEMORY */
3947
3948	#if KMP_USE_BGET
3949	KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3950	__kmp_initialize_bget(th: root_thread);
3951	#endif
3952	__kmp_init_random(thread: root_thread); // Initialize random number generator
3953	}
3954
3955	/ setup the serial team held in reserve by the root thread /
3956	if (!root_thread->th.th_serial_team) {
3957	kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3958	KF_TRACE(`10`, ("__kmp_register_root: before serial_team\n"));
3959	root_thread->th.th_serial_team = __kmp_allocate_team(
3960	root, new_nproc: `1`, max_nproc: `1`,
3961	#if OMPT_SUPPORT
3962	ompt_data_none, // root parallel id
3963	#endif
3964	proc_bind: proc_bind_default, new_icvs: &r_icvs, argc: `0` USE_NESTED_HOT_ARG(NULL));
3965	}
3966	KMP_ASSERT(root_thread->th.th_serial_team);
3967	KF_TRACE(`10`, ("__kmp_register_root: after serial_team = %p\n",
3968	root_thread->th.th_serial_team));
3969
3970	/ drop root_thread into place /
3971	TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3972
3973	root->r.r_root_team->t.t_threads[`0`] = root_thread;
3974	root->r.r_hot_team->t.t_threads[`0`] = root_thread;
3975	root_thread->th.th_serial_team->t.t_threads[`0`] = root_thread;
3976	// AC: the team created in reserve, not for execution (it is unused for now).
3977	root_thread->th.th_serial_team->t.t_serialized = `0`;
3978	root->r.r_uber_thread = root_thread;
3979
3980	/ initialize the thread, get it ready to go /
3981	__kmp_initialize_info(root_thread, root->r.r_root_team, tid: `0`, gtid);
3982	TCW_4(__kmp_init_gtid, TRUE);
3983
3984	/ prepare the primary thread for get_gtid() /
3985	__kmp_gtid_set_specific(gtid);
3986
3987	#if USE_ITT_BUILD
3988	__kmp_itt_thread_name(gtid);
3989	#endif /* USE_ITT_BUILD */
3990
3991	#ifdef KMP_TDATA_GTID
3992	__kmp_gtid = gtid;
3993	#endif
3994	__kmp_create_worker(gtid, th: root_thread, stack_size: __kmp_stksize);
3995	KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3996
3997	KA_TRACE(`20`, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3998	"plain=%u\n",
3999	gtid, __kmp_gtid_from_tid(`0`, root->r.r_hot_team),
4000	root->r.r_hot_team->t.t_id, `0`, KMP_INIT_BARRIER_STATE,
4001	KMP_INIT_BARRIER_STATE));
4002	{ // Initialize barrier data.
4003	int b;
4004	for (b = `0`; b < bs_last_barrier; ++b) {
4005	root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
4006	#if USE_DEBUGGER
4007	root_thread->th.th_bar[b].bb.b_worker_arrived = `0`;
4008	#endif
4009	}
4010	}
4011	KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
4012	KMP_INIT_BARRIER_STATE);
4013
4014	#if KMP_AFFINITY_SUPPORTED
4015	root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
4016	root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
4017	root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
4018	root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
4019	#endif /* KMP_AFFINITY_SUPPORTED */
4020	root_thread->th.th_def_allocator = __kmp_def_allocator;
4021	root_thread->th.th_prev_level = `0`;
4022	root_thread->th.th_prev_num_threads = `1`;
4023
4024	kmp_cg_root_t tmp = (kmp_cg_root_t )__kmp_allocate(sizeof(kmp_cg_root_t));
4025	tmp->cg_root = root_thread;
4026	tmp->cg_thread_limit = __kmp_cg_max_nth;
4027	tmp->cg_nthreads = `1`;
4028	KA_TRACE(`100`, ("__kmp_register_root: Thread %p created node %p with"
4029	" cg_nthreads init to 1\n",
4030	root_thread, tmp));
4031	tmp->up = NULL;
4032	root_thread->th.th_cg_roots = tmp;
4033
4034	__kmp_root_counter++;
4035
4036	#if OMPT_SUPPORT
4037	if (ompt_enabled.enabled) {
4038
4039	kmp_info_t *root_thread = ompt_get_thread();
4040
4041	ompt_set_thread_state(thread: root_thread, state: ompt_state_overhead);
4042
4043	if (ompt_enabled.ompt_callback_thread_begin) {
4044	ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4045	ompt_thread_initial, __ompt_get_thread_data_internal());
4046	}
4047	ompt_data_t *task_data;
4048	ompt_data_t *parallel_data;
4049	__ompt_get_task_info_internal(ancestor_level: `0`, NULL, task_data: &task_data, NULL, parallel_data: &parallel_data,
4050	NULL);
4051	if (ompt_enabled.ompt_callback_implicit_task) {
4052	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4053	ompt_scope_begin, parallel_data, task_data, `1`, `1`, ompt_task_initial);
4054	}
4055
4056	ompt_set_thread_state(thread: root_thread, state: ompt_state_work_serial);
4057	}
4058	#endif
4059	#if OMPD_SUPPORT
4060	if (ompd_state & OMPD_ENABLE_BP)
4061	ompd_bp_thread_begin();
4062	#endif
4063
4064	KMP_MB();
4065	__kmp_release_bootstrap_lock(lck: &__kmp_forkjoin_lock);
4066
4067	return gtid;
4068	}
4069
4070	#if KMP_NESTED_HOT_TEAMS
4071	static int __kmp_free_hot_teams(kmp_root_t root, kmp_info_t thr, int level,
4072	const int max_level) {
4073	int i, n, nth;
4074	kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4075	if (!hot_teams \|\| !hot_teams[level].hot_team) {
4076	return `0`;
4077	}
4078	KMP_DEBUG_ASSERT(level < max_level);
4079	kmp_team_t *team = hot_teams[level].hot_team;
4080	nth = hot_teams[level].hot_team_nth;
4081	n = nth - `1`; // primary thread is not freed
4082	if (level < max_level - `1`) {
4083	for (i = `0`; i < nth; ++i) {
4084	kmp_info_t *th = team->t.t_threads[i];
4085	n += __kmp_free_hot_teams(root, thr: th, level: level + `1`, max_level);
4086	if (i > `0` && th->th.th_hot_teams) {
4087	__kmp_free(th->th.th_hot_teams);
4088	th->th.th_hot_teams = NULL;
4089	}
4090	}
4091	}
4092	__kmp_free_team(root, team, NULL);
4093	return n;
4094	}
4095	#endif
4096
4097	// Resets a root thread and clear its root and hot teams.
4098	// Returns the number of __kmp_threads entries directly and indirectly freed.
4099	static int __kmp_reset_root(int gtid, kmp_root_t *root) {
4100	kmp_team_t *root_team = root->r.r_root_team;
4101	kmp_team_t *hot_team = root->r.r_hot_team;
4102	int n = hot_team->t.t_nproc;
4103	int i;
4104
4105	KMP_DEBUG_ASSERT(!root->r.r_active);
4106
4107	root->r.r_root_team = NULL;
4108	root->r.r_hot_team = NULL;
4109	// __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
4110	// before call to __kmp_free_team().
4111	__kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4112	#if KMP_NESTED_HOT_TEAMS
4113	if (__kmp_hot_teams_max_level >
4114	`0`) { // need to free nested hot teams and their threads if any
4115	for (i = `0`; i < hot_team->t.t_nproc; ++i) {
4116	kmp_info_t *th = hot_team->t.t_threads[i];
4117	if (__kmp_hot_teams_max_level > `1`) {
4118	n += __kmp_free_hot_teams(root, thr: th, level: `1`, max_level: __kmp_hot_teams_max_level);
4119	}
4120	if (th->th.th_hot_teams) {
4121	__kmp_free(th->th.th_hot_teams);
4122	th->th.th_hot_teams = NULL;
4123	}
4124	}
4125	}
4126	#endif
4127	__kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4128
4129	// Before we can reap the thread, we need to make certain that all other
4130	// threads in the teams that had this root as ancestor have stopped trying to
4131	// steal tasks.
4132	if (__kmp_tasking_mode != tskm_immediate_exec) {
4133	__kmp_wait_to_unref_task_teams();
4134	}
4135
4136	#if KMP_OS_WINDOWS
4137	/ Close Handle of root duplicated in __kmp_create_worker (tr #62919) /
4138	KA_TRACE(
4139	`10`, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4140	"\n",
4141	(LPVOID) & (root->r.r_uber_thread->th),
4142	root->r.r_uber_thread->th.th_info.ds.ds_thread));
4143	__kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4144	#endif /* KMP_OS_WINDOWS */
4145
4146	#if OMPD_SUPPORT
4147	if (ompd_state & OMPD_ENABLE_BP)
4148	ompd_bp_thread_end();
4149	#endif
4150
4151	#if OMPT_SUPPORT
4152	ompt_data_t *task_data;
4153	ompt_data_t *parallel_data;
4154	__ompt_get_task_info_internal(ancestor_level: `0`, NULL, task_data: &task_data, NULL, parallel_data: &parallel_data,
4155	NULL);
4156	if (ompt_enabled.ompt_callback_implicit_task) {
4157	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4158	ompt_scope_end, parallel_data, task_data, `0`, `1`, ompt_task_initial);
4159	}
4160	if (ompt_enabled.ompt_callback_thread_end) {
4161	ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4162	&(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4163	}
4164	#endif
4165
4166	TCW_4(__kmp_nth,
4167	__kmp_nth - `1`); // __kmp_reap_thread will decrement __kmp_all_nth.
4168	i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4169	KA_TRACE(`100`, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4170	" to %d\n",
4171	root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4172	root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4173	if (i == `1`) {
4174	// need to free contention group structure
4175	KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4176	root->r.r_uber_thread->th.th_cg_roots->cg_root);
4177	KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4178	__kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4179	root->r.r_uber_thread->th.th_cg_roots = NULL;
4180	}
4181	__kmp_reap_thread(thread: root->r.r_uber_thread, is_root: `1`);
4182
4183	// We canot put root thread to __kmp_thread_pool, so we have to reap it
4184	// instead of freeing.
4185	root->r.r_uber_thread = NULL;
4186	/ mark root as no longer in use /
4187	root->r.r_begin = FALSE;
4188
4189	return n;
4190	}
4191
4192	void __kmp_unregister_root_current_thread(int gtid) {
4193	KA_TRACE(`1`, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4194	/ this lock should be ok, since unregister_root_current_thread is never*
4195	called during an abort, only during a normal close. furthermore, if you
4196	have the forkjoin lock, you should never try to get the initz lock /*
4197	__kmp_acquire_bootstrap_lock(lck: &__kmp_forkjoin_lock);
4198	if (TCR_4(__kmp_global.g.g_done) \|\| !__kmp_init_serial) {
4199	KC_TRACE(`10`, ("__kmp_unregister_root_current_thread: already finished, "
4200	"exiting T#%d\n",
4201	gtid));
4202	__kmp_release_bootstrap_lock(lck: &__kmp_forkjoin_lock);
4203	return;
4204	}
4205	kmp_root_t *root = __kmp_root[gtid];
4206
4207	KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4208	KMP_ASSERT(KMP_UBER_GTID(gtid));
4209	KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4210	KMP_ASSERT(root->r.r_active == FALSE);
4211
4212	KMP_MB();
4213
4214	kmp_info_t *thread = __kmp_threads[gtid];
4215	kmp_team_t *team = thread->th.th_team;
4216	kmp_task_team_t *task_team = thread->th.th_task_team;
4217
4218	// we need to wait for the proxy tasks before finishing the thread
4219	if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks \|\|
4220	task_team->tt.tt_hidden_helper_task_encountered)) {
4221	#if OMPT_SUPPORT
4222	// the runtime is shutting down so we won't report any events
4223	thread->th.ompt_thread_info.state = ompt_state_undefined;
4224	#endif
4225	__kmp_task_team_wait(this_thr: thread, team USE_ITT_BUILD_ARG(NULL));
4226	}
4227
4228	__kmp_reset_root(gtid, root);
4229
4230	KMP_MB();
4231	KC_TRACE(`10`,
4232	("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4233
4234	__kmp_release_bootstrap_lock(lck: &__kmp_forkjoin_lock);
4235	}
4236
4237	#if KMP_OS_WINDOWS
4238	/ __kmp_forkjoin_lock must be already held*
4239	Unregisters a root thread that is not the current thread. Returns the number
4240	of __kmp_threads entries freed as a result. /*
4241	static int __kmp_unregister_root_other_thread(int gtid) {
4242	kmp_root_t *root = __kmp_root[gtid];
4243	int r;
4244
4245	KA_TRACE(`1`, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4246	KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4247	KMP_ASSERT(KMP_UBER_GTID(gtid));
4248	KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4249	KMP_ASSERT(root->r.r_active == FALSE);
4250
4251	r = __kmp_reset_root(gtid, root);
4252	KC_TRACE(`10`,
4253	("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4254	return r;
4255	}
4256	#endif
4257
4258	#if KMP_DEBUG
4259	void __kmp_task_info() {
4260
4261	kmp_int32 gtid = __kmp_entry_gtid();
4262	kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4263	kmp_info_t *this_thr = __kmp_threads[gtid];
4264	kmp_team_t *steam = this_thr->th.th_serial_team;
4265	kmp_team_t *team = this_thr->th.th_team;
4266
4267	__kmp_printf(
4268	format: "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4269	"ptask=%p\n",
4270	gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4271	team->t.t_implicit_task_taskdata[tid].td_parent);
4272	}
4273	#endif // KMP_DEBUG
4274
4275	/ TODO optimize with one big memclr, take out what isn't needed, split*
4276	responsibility to workers as much as possible, and delay initialization of
4277	features as much as possible /*
4278	static void __kmp_initialize_info(kmp_info_t this_thr, kmp_team_t team,
4279	int tid, int gtid) {
4280	/ this_thr->th.th_info.ds.ds_gtid is setup in*
4281	kmp_allocate_thread/create_worker.
4282	this_thr->th.th_serial_team is setup in __kmp_allocate_thread /*
4283	KMP_DEBUG_ASSERT(this_thr != NULL);
4284	KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4285	KMP_DEBUG_ASSERT(team);
4286	KMP_DEBUG_ASSERT(team->t.t_threads);
4287	KMP_DEBUG_ASSERT(team->t.t_dispatch);
4288	kmp_info_t *master = team->t.t_threads[`0`];
4289	KMP_DEBUG_ASSERT(master);
4290	KMP_DEBUG_ASSERT(master->th.th_root);
4291
4292	KMP_MB();
4293
4294	TCW_SYNC_PTR(this_thr->th.th_team, team);
4295
4296	this_thr->th.th_info.ds.ds_tid = tid;
4297	this_thr->th.th_set_nproc = `0`;
4298	if (__kmp_tasking_mode != tskm_immediate_exec)
4299	// When tasking is possible, threads are not safe to reap until they are
4300	// done tasking; this will be set when tasking code is exited in wait
4301	this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4302	else // no tasking --> always safe to reap
4303	this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4304	this_thr->th.th_set_proc_bind = proc_bind_default;
4305
4306	#if KMP_AFFINITY_SUPPORTED
4307	this_thr->th.th_new_place = this_thr->th.th_current_place;
4308	#endif
4309	this_thr->th.th_root = master->th.th_root;
4310
4311	/ setup the thread's cache of the team structure /
4312	this_thr->th.th_team_nproc = team->t.t_nproc;
4313	this_thr->th.th_team_master = master;
4314	this_thr->th.th_team_serialized = team->t.t_serialized;
4315
4316	KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4317
4318	KF_TRACE(`10`, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4319	tid, gtid, this_thr, this_thr->th.th_current_task));
4320
4321	__kmp_init_implicit_task(loc_ref: this_thr->th.th_team_master->th.th_ident, this_thr,
4322	team, tid, TRUE);
4323
4324	KF_TRACE(`10`, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4325	tid, gtid, this_thr, this_thr->th.th_current_task));
4326	// TODO: Initialize ICVs from parent; GEH - isn't that already done in
4327	// __kmp_initialize_team()?
4328
4329	/ TODO no worksharing in speculative threads /
4330	this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4331
4332	this_thr->th.th_local.this_construct = `0`;
4333
4334	if (!this_thr->th.th_pri_common) {
4335	this_thr->th.th_pri_common =
4336	(struct common_table )__kmp_allocate(sizeof(struct* common_table));
4337	if (__kmp_storage_map) {
4338	__kmp_print_storage_map_gtid(
4339	gtid, p1: this_thr->th.th_pri_common, p2: this_thr->th.th_pri_common + `1`,
4340	size: sizeof(struct common_table), format: "th_%d.th_pri_common\n", gtid);
4341	}
4342	this_thr->th.th_pri_head = NULL;
4343	}
4344
4345	if (this_thr != master && // Primary thread's CG root is initialized elsewhere
4346	this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set
4347	// Make new thread's CG root same as primary thread's
4348	KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4349	kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4350	if (tmp) {
4351	// worker changes CG, need to check if old CG should be freed
4352	int i = tmp->cg_nthreads--;
4353	KA_TRACE(`100`, ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
4354	" on node %p of thread %p to %d\n",
4355	this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4356	if (i == `1`) {
4357	__kmp_free(tmp); // last thread left CG --> free it
4358	}
4359	}
4360	this_thr->th.th_cg_roots = master->th.th_cg_roots;
4361	// Increment new thread's CG root's counter to add the new thread
4362	this_thr->th.th_cg_roots->cg_nthreads++;
4363	KA_TRACE(`100`, ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
4364	" node %p of thread %p to %d\n",
4365	this_thr, this_thr->th.th_cg_roots,
4366	this_thr->th.th_cg_roots->cg_root,
4367	this_thr->th.th_cg_roots->cg_nthreads));
4368	this_thr->th.th_current_task->td_icvs.thread_limit =
4369	this_thr->th.th_cg_roots->cg_thread_limit;
4370	}
4371
4372	/ Initialize dynamic dispatch /
4373	{
4374	volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4375	// Use team max_nproc since this will never change for the team.
4376	size_t disp_size =
4377	sizeof(dispatch_private_info_t) *
4378	(team->t.t_max_nproc == `1` ? `1` : __kmp_dispatch_num_buffers);
4379	KD_TRACE(`10`, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4380	team->t.t_max_nproc));
4381	KMP_ASSERT(dispatch);
4382	KMP_DEBUG_ASSERT(team->t.t_dispatch);
4383	KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4384
4385	dispatch->th_disp_index = `0`;
4386	dispatch->th_doacross_buf_idx = `0`;
4387	if (!dispatch->th_disp_buffer) {
4388	dispatch->th_disp_buffer =
4389	(dispatch_private_info_t *)__kmp_allocate(disp_size);
4390
4391	if (__kmp_storage_map) {
4392	__kmp_print_storage_map_gtid(
4393	gtid, p1: &dispatch->th_disp_buffer[`0`],
4394	p2: &dispatch->th_disp_buffer[team->t.t_max_nproc == `1`
4395	? `1`
4396	: __kmp_dispatch_num_buffers],
4397	size: disp_size,
4398	format: "th_%d.th_dispatch.th_disp_buffer "
4399	"(team_%d.t_dispatch[%d].th_disp_buffer)",
4400	gtid, team->t.t_id, gtid);
4401	}
4402	} else {
4403	memset(s: &dispatch->th_disp_buffer[`0`], c: `'\0'`, n: disp_size);
4404	}
4405
4406	dispatch->th_dispatch_pr_current = `0`;
4407	dispatch->th_dispatch_sh_current = `0`;
4408
4409	dispatch->th_deo_fcn = `0`; / ORDERED /
4410	dispatch->th_dxo_fcn = `0`; / END ORDERED /
4411	}
4412
4413	this_thr->th.th_next_pool = NULL;
4414
4415	KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4416	KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == `0`);
4417
4418	KMP_MB();
4419	}
4420
4421	/ allocate a new thread for the requesting team. this is only called from*
4422	within a forkjoin critical section. we will first try to get an available
4423	thread from the thread pool. if none is available, we will fork a new one
4424	assuming we are able to create a new one. this should be assured, as the
4425	caller should check on this first. /*
4426	kmp_info_t __kmp_allocate_thread(kmp_root_t root, kmp_team_t *team,
4427	int new_tid) {
4428	kmp_team_t *serial_team;
4429	kmp_info_t *new_thr;
4430	int new_gtid;
4431
4432	KA_TRACE(`20`, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4433	KMP_DEBUG_ASSERT(root && team);
4434	#if !KMP_NESTED_HOT_TEAMS
4435	KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4436	#endif
4437	KMP_MB();
4438
4439	/ first, try to get one from the thread pool unless allocating thread is*
4440	* the main hidden helper thread. The hidden helper team should always
4441	* allocate new OS threads. */
4442	if (__kmp_thread_pool && !KMP_HIDDEN_HELPER_TEAM(team)) {
4443	new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4444	__kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4445	if (new_thr == __kmp_thread_pool_insert_pt) {
4446	__kmp_thread_pool_insert_pt = NULL;
4447	}
4448	TCW_4(new_thr->th.th_in_pool, FALSE);
4449	__kmp_suspend_initialize_thread(th: new_thr);
4450	__kmp_lock_suspend_mx(th: new_thr);
4451	if (new_thr->th.th_active_in_pool == TRUE) {
4452	KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4453	KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4454	new_thr->th.th_active_in_pool = FALSE;
4455	}
4456	__kmp_unlock_suspend_mx(th: new_thr);
4457
4458	KA_TRACE(`20`, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4459	__kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4460	KMP_ASSERT(!new_thr->th.th_team);
4461	KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4462
4463	/ setup the thread structure /
4464	__kmp_initialize_info(this_thr: new_thr, team, tid: new_tid,
4465	gtid: new_thr->th.th_info.ds.ds_gtid);
4466	KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4467
4468	TCW_4(__kmp_nth, __kmp_nth + `1`);
4469
4470	new_thr->th.th_task_state = `0`;
4471
4472	if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4473	// Make sure pool thread has transitioned to waiting on own thread struct
4474	KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == `0`);
4475	// Thread activated in __kmp_allocate_team when increasing team size
4476	}
4477
4478	#ifdef KMP_ADJUST_BLOCKTIME
4479	/ Adjust blocktime back to zero if necessary /
4480	/ Middle initialization might not have occurred yet /
4481	if (!__kmp_env_blocktime && (__kmp_avail_proc > `0`)) {
4482	if (__kmp_nth > __kmp_avail_proc) {
4483	__kmp_zero_bt = TRUE;
4484	}
4485	}
4486	#endif /* KMP_ADJUST_BLOCKTIME */
4487
4488	#if KMP_DEBUG
4489	// If thread entered pool via __kmp_free_thread, wait_flag should !=
4490	// KMP_BARRIER_PARENT_FLAG.
4491	int b;
4492	kmp_balign_t *balign = new_thr->th.th_bar;
4493	for (b = `0`; b < bs_last_barrier; ++b)
4494	KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4495	#endif
4496
4497	KF_TRACE(`10`, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4498	__kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4499
4500	KMP_MB();
4501	return new_thr;
4502	}
4503
4504	/ no, well fork a new one /
4505	KMP_ASSERT(KMP_HIDDEN_HELPER_TEAM(team) \|\| __kmp_nth == __kmp_all_nth);
4506	KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4507
4508	#if KMP_USE_MONITOR
4509	// If this is the first worker thread the RTL is creating, then also
4510	// launch the monitor thread. We try to do this as early as possible.
4511	if (!TCR_4(__kmp_init_monitor)) {
4512	__kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4513	if (!TCR_4(__kmp_init_monitor)) {
4514	KF_TRACE(`10`, ("before __kmp_create_monitor\n"));
4515	TCW_4(__kmp_init_monitor, `1`);
4516	__kmp_create_monitor(&__kmp_monitor);
4517	KF_TRACE(`10`, ("after __kmp_create_monitor\n"));
4518	#if KMP_OS_WINDOWS
4519	// AC: wait until monitor has started. This is a fix for CQ232808.
4520	// The reason is that if the library is loaded/unloaded in a loop with
4521	// small (parallel) work in between, then there is high probability that
4522	// monitor thread started after the library shutdown. At shutdown it is
4523	// too late to cope with the problem, because when the primary thread is
4524	// in DllMain (process detach) the monitor has no chances to start (it is
4525	// blocked), and primary thread has no means to inform the monitor that
4526	// the library has gone, because all the memory which the monitor can
4527	// access is going to be released/reset.
4528	while (TCR_4(__kmp_init_monitor) < `2`) {
4529	KMP_YIELD(TRUE);
4530	}
4531	KF_TRACE(`10`, ("after monitor thread has started\n"));
4532	#endif
4533	}
4534	__kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4535	}
4536	#endif
4537
4538	KMP_MB();
4539
4540	{
4541	int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4542	? `1`
4543	: __kmp_hidden_helper_threads_num + `1`;
4544
4545	for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4546	++new_gtid) {
4547	KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4548	}
4549
4550	if (TCR_4(__kmp_init_hidden_helper_threads)) {
4551	KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4552	}
4553	}
4554
4555	/ allocate space for it. /
4556	new_thr = (kmp_info_t )__kmp_allocate(sizeof*(kmp_info_t));
4557
4558	new_thr->th.th_nt_strict = false;
4559	new_thr->th.th_nt_loc = NULL;
4560	new_thr->th.th_nt_sev = severity_fatal;
4561	new_thr->th.th_nt_msg = NULL;
4562
4563	TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4564
4565	#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4566	// suppress race conditions detection on synchronization flags in debug mode
4567	// this helps to analyze library internals eliminating false positives
4568	__itt_suppress_mark_range(
4569	__itt_suppress_range, __itt_suppress_threading_errors,
4570	&new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc));
4571	__itt_suppress_mark_range(
4572	__itt_suppress_range, __itt_suppress_threading_errors,
4573	&new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state));
4574	#if KMP_OS_WINDOWS
4575	__itt_suppress_mark_range(
4576	__itt_suppress_range, __itt_suppress_threading_errors,
4577	&new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init));
4578	#else
4579	__itt_suppress_mark_range(__itt_suppress_range,
4580	__itt_suppress_threading_errors,
4581	&new_thr->th.th_suspend_init_count,
4582	sizeof(new_thr->th.th_suspend_init_count));
4583	#endif
4584	// TODO: check if we need to also suppress b_arrived flags
4585	__itt_suppress_mark_range(__itt_suppress_range,
4586	__itt_suppress_threading_errors,
4587	CCAST(kmp_uint64 *, &new_thr->th.th_bar[`0`].bb.b_go),
4588	sizeof(new_thr->th.th_bar[`0`].bb.b_go));
4589	__itt_suppress_mark_range(__itt_suppress_range,
4590	__itt_suppress_threading_errors,
4591	CCAST(kmp_uint64 *, &new_thr->th.th_bar[`1`].bb.b_go),
4592	sizeof(new_thr->th.th_bar[`1`].bb.b_go));
4593	__itt_suppress_mark_range(__itt_suppress_range,
4594	__itt_suppress_threading_errors,
4595	CCAST(kmp_uint64 *, &new_thr->th.th_bar[`2`].bb.b_go),
4596	sizeof(new_thr->th.th_bar[`2`].bb.b_go));
4597	#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
4598	if (__kmp_storage_map) {
4599	__kmp_print_thread_storage_map(thr: new_thr, gtid: new_gtid);
4600	}
4601
4602	// add the reserve serialized team, initialized from the team's primary thread
4603	{
4604	kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4605	KF_TRACE(`10`, ("__kmp_allocate_thread: before th_serial/serial_team\n"));
4606	new_thr->th.th_serial_team = serial_team =
4607	(kmp_team_t *)__kmp_allocate_team(root, new_nproc: `1`, max_nproc: `1`,
4608	#if OMPT_SUPPORT
4609	ompt_data_none, // root parallel id
4610	#endif
4611	proc_bind: proc_bind_default, new_icvs: &r_icvs,
4612	argc: `0` USE_NESTED_HOT_ARG(NULL));
4613	}
4614	KMP_ASSERT(serial_team);
4615	serial_team->t.t_serialized = `0`; // AC: the team created in reserve, not for
4616	// execution (it is unused for now).
4617	serial_team->t.t_threads[`0`] = new_thr;
4618	KF_TRACE(`10`,
4619	("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4620	new_thr));
4621
4622	/ setup the thread structures /
4623	__kmp_initialize_info(this_thr: new_thr, team, tid: new_tid, gtid: new_gtid);
4624
4625	#if USE_FAST_MEMORY
4626	__kmp_initialize_fast_memory(this_thr: new_thr);
4627	#endif /* USE_FAST_MEMORY */
4628
4629	#if KMP_USE_BGET
4630	KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4631	__kmp_initialize_bget(th: new_thr);
4632	#endif
4633
4634	__kmp_init_random(thread: new_thr); // Initialize random number generator
4635
4636	/ Initialize these only once when thread is grabbed for a team allocation /
4637	KA_TRACE(`20`,
4638	("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4639	__kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4640
4641	int b;
4642	kmp_balign_t *balign = new_thr->th.th_bar;
4643	for (b = `0`; b < bs_last_barrier; ++b) {
4644	balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4645	balign[b].bb.team = NULL;
4646	balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4647	balign[b].bb.use_oncore_barrier = `0`;
4648	}
4649
4650	TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4651	new_thr->th.th_sleep_loc_type = flag_unset;
4652
4653	new_thr->th.th_spin_here = FALSE;
4654	new_thr->th.th_next_waiting = `0`;
4655	#if KMP_OS_UNIX
4656	new_thr->th.th_blocking = false;
4657	#endif
4658
4659	#if KMP_AFFINITY_SUPPORTED
4660	new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4661	new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4662	new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4663	new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4664	#endif
4665	new_thr->th.th_def_allocator = __kmp_def_allocator;
4666	new_thr->th.th_prev_level = `0`;
4667	new_thr->th.th_prev_num_threads = `1`;
4668
4669	TCW_4(new_thr->th.th_in_pool, FALSE);
4670	new_thr->th.th_active_in_pool = FALSE;
4671	TCW_4(new_thr->th.th_active, TRUE);
4672
4673	new_thr->th.th_set_nested_nth = NULL;
4674	new_thr->th.th_set_nested_nth_sz = `0`;
4675
4676	/ adjust the global counters /
4677	__kmp_all_nth++;
4678	__kmp_nth++;
4679
4680	// if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4681	// numbers of procs, and method #2 (keyed API call) for higher numbers.
4682	if (__kmp_adjust_gtid_mode) {
4683	if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4684	if (TCR_4(__kmp_gtid_mode) != `2`) {
4685	TCW_4(__kmp_gtid_mode, `2`);
4686	}
4687	} else {
4688	if (TCR_4(__kmp_gtid_mode) != `1`) {
4689	TCW_4(__kmp_gtid_mode, `1`);
4690	}
4691	}
4692	}
4693
4694	#ifdef KMP_ADJUST_BLOCKTIME
4695	/ Adjust blocktime back to zero if necessary /
4696	/ Middle initialization might not have occurred yet /
4697	if (!__kmp_env_blocktime && (__kmp_avail_proc > `0`)) {
4698	if (__kmp_nth > __kmp_avail_proc) {
4699	__kmp_zero_bt = TRUE;
4700	}
4701	}
4702	#endif /* KMP_ADJUST_BLOCKTIME */
4703
4704	#if KMP_AFFINITY_SUPPORTED
4705	// Set the affinity and topology information for new thread
4706	__kmp_affinity_set_init_mask(gtid: new_gtid, /isa_root=/FALSE);
4707	#endif
4708
4709	/ actually fork it and create the new worker thread /
4710	KF_TRACE(
4711	`10`, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4712	__kmp_create_worker(gtid: new_gtid, th: new_thr, stack_size: __kmp_stksize);
4713	KF_TRACE(`10`,
4714	("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4715
4716	KA_TRACE(`20`, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4717	new_gtid));
4718	KMP_MB();
4719	return new_thr;
4720	}
4721
4722	/ Reinitialize team for reuse.*
4723	The hot team code calls this case at every fork barrier, so EPCC barrier
4724	test are extremely sensitive to changes in it, esp. writes to the team
4725	struct, which cause a cache invalidation in all threads.
4726	IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! /*
4727	static void __kmp_reinitialize_team(kmp_team_t *team,
4728	kmp_internal_control_t *new_icvs,
4729	ident_t *loc) {
4730	KF_TRACE(`10`, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4731	team->t.t_threads[`0`], team));
4732	KMP_DEBUG_ASSERT(team && new_icvs);
4733	KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) \|\| new_icvs->nproc);
4734	KMP_CHECK_UPDATE(team->t.t_ident, loc);
4735
4736	KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4737	// Copy ICVs to the primary thread's implicit taskdata
4738	__kmp_init_implicit_task(loc_ref: loc, this_thr: team->t.t_threads[`0`], team, tid: `0`, FALSE);
4739	copy_icvs(dst: &team->t.t_implicit_task_taskdata[`0`].td_icvs, src: new_icvs);
4740
4741	KF_TRACE(`10`, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4742	team->t.t_threads[`0`], team));
4743	}
4744
4745	/ Initialize the team data structure.*
4746	This assumes the t_threads and t_max_nproc are already set.
4747	Also, we don't touch the arguments /*
4748	static void __kmp_initialize_team(kmp_team_t team, int* new_nproc,
4749	kmp_internal_control_t *new_icvs,
4750	ident_t *loc) {
4751	KF_TRACE(`10`, ("__kmp_initialize_team: enter: team=%p\n", team));
4752
4753	/ verify /
4754	KMP_DEBUG_ASSERT(team);
4755	KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4756	KMP_DEBUG_ASSERT(team->t.t_threads);
4757	KMP_MB();
4758
4759	team->t.t_master_tid = `0`; / not needed /
4760	/ team->t.t_master_bar; not needed /
4761	team->t.t_serialized = new_nproc > `1` ? `0` : `1`;
4762	team->t.t_nproc = new_nproc;
4763
4764	/ team->t.t_parent = NULL; TODO not needed & would mess up hot team /
4765	team->t.t_next_pool = NULL;
4766	/ memset( team->t.t_threads, 0, sizeof(kmp_info_t)new_nproc ); would mess*
4767	* up hot team */
4768
4769	TCW_SYNC_PTR(team->t.t_pkfn, NULL); / not needed /
4770	team->t.t_invoke = NULL; / not needed /
4771
4772	// TODO???: team->t.t_max_active_levels = new_max_active_levels;
4773	team->t.t_sched.sched = new_icvs->sched.sched;
4774
4775	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
4776	team->t.t_fp_control_saved = FALSE; / not needed /
4777	team->t.t_x87_fpu_control_word = `0`; / not needed /
4778	team->t.t_mxcsr = `0`; / not needed /
4779	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
4780
4781	team->t.t_construct = `0`;
4782
4783	team->t.t_ordered.dt.t_value = `0`;
4784	team->t.t_master_active = FALSE;
4785
4786	#ifdef KMP_DEBUG
4787	team->t.t_copypriv_data = NULL; / not necessary, but nice for debugging /
4788	#endif
4789	#if KMP_OS_WINDOWS
4790	team->t.t_copyin_counter = `0`; / for barrier-free copyin implementation /
4791	#endif
4792
4793	team->t.t_control_stack_top = NULL;
4794
4795	__kmp_reinitialize_team(team, new_icvs, loc);
4796
4797	KMP_MB();
4798	KF_TRACE(`10`, ("__kmp_initialize_team: exit: team=%p\n", team));
4799	}
4800
4801	#if KMP_AFFINITY_SUPPORTED
4802	static inline void __kmp_set_thread_place(kmp_team_t team, kmp_info_t th,
4803	int first, int last, int newp) {
4804	th->th.th_first_place = first;
4805	th->th.th_last_place = last;
4806	th->th.th_new_place = newp;
4807	if (newp != th->th.th_current_place) {
4808	if (__kmp_display_affinity && team->t.t_display_affinity != `1`)
4809	team->t.t_display_affinity = `1`;
4810	// Copy topology information associated with the new place
4811	th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4812	th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4813	}
4814	}
4815
4816	// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4817	// It calculates the worker + primary thread's partition based upon the parent
4818	// thread's partition, and binds each worker to a thread in their partition.
4819	// The primary thread's partition should already include its current binding.
4820	static void __kmp_partition_places(kmp_team_t team, int* update_master_only) {
4821	// Do not partition places for the hidden helper team
4822	if (KMP_HIDDEN_HELPER_TEAM(team))
4823	return;
4824	// Copy the primary thread's place partition to the team struct
4825	kmp_info_t *master_th = team->t.t_threads[`0`];
4826	KMP_DEBUG_ASSERT(master_th != NULL);
4827	kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4828	int first_place = master_th->th.th_first_place;
4829	int last_place = master_th->th.th_last_place;
4830	int masters_place = master_th->th.th_current_place;
4831	int num_masks = __kmp_affinity.num_masks;
4832	team->t.t_first_place = first_place;
4833	team->t.t_last_place = last_place;
4834
4835	KA_TRACE(`20`, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4836	"bound to place %d partition = [%d,%d]\n",
4837	proc_bind, __kmp_gtid_from_thread(team->t.t_threads[`0`]),
4838	team->t.t_id, masters_place, first_place, last_place));
4839
4840	switch (proc_bind) {
4841
4842	case proc_bind_default:
4843	// Serial teams might have the proc_bind policy set to proc_bind_default.
4844	// Not an issue -- we don't rebind primary thread for any proc_bind policy.
4845	KMP_DEBUG_ASSERT(team->t.t_nproc == `1`);
4846	break;
4847
4848	case proc_bind_primary: {
4849	int f;
4850	int n_th = team->t.t_nproc;
4851	for (f = `1`; f < n_th; f++) {
4852	kmp_info_t *th = team->t.t_threads[f];
4853	KMP_DEBUG_ASSERT(th != NULL);
4854	__kmp_set_thread_place(team, th, first: first_place, last: last_place, newp: masters_place);
4855
4856	KA_TRACE(`100`, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4857	"partition = [%d,%d]\n",
4858	__kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4859	f, masters_place, first_place, last_place));
4860	}
4861	} break;
4862
4863	case proc_bind_close: {
4864	int f;
4865	int n_th = team->t.t_nproc;
4866	int n_places;
4867	if (first_place <= last_place) {
4868	n_places = last_place - first_place + `1`;
4869	} else {
4870	n_places = num_masks - first_place + last_place + `1`;
4871	}
4872	if (n_th <= n_places) {
4873	int place = masters_place;
4874	for (f = `1`; f < n_th; f++) {
4875	kmp_info_t *th = team->t.t_threads[f];
4876	KMP_DEBUG_ASSERT(th != NULL);
4877
4878	if (place == last_place) {
4879	place = first_place;
4880	} else if (place == (num_masks - `1`)) {
4881	place = `0`;
4882	} else {
4883	place++;
4884	}
4885	__kmp_set_thread_place(team, th, first: first_place, last: last_place, newp: place);
4886
4887	KA_TRACE(`100`, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4888	"partition = [%d,%d]\n",
4889	__kmp_gtid_from_thread(team->t.t_threads[f]),
4890	team->t.t_id, f, place, first_place, last_place));
4891	}
4892	} else {
4893	int S, rem, gap, s_count;
4894	S = n_th / n_places;
4895	s_count = `0`;
4896	rem = n_th - (S * n_places);
4897	gap = rem > `0` ? n_places / rem : n_places;
4898	int place = masters_place;
4899	int gap_ct = gap;
4900	for (f = `0`; f < n_th; f++) {
4901	kmp_info_t *th = team->t.t_threads[f];
4902	KMP_DEBUG_ASSERT(th != NULL);
4903
4904	__kmp_set_thread_place(team, th, first: first_place, last: last_place, newp: place);
4905	s_count++;
4906
4907	if ((s_count == S) && rem && (gap_ct == gap)) {
4908	// do nothing, add an extra thread to place on next iteration
4909	} else if ((s_count == S + `1`) && rem && (gap_ct == gap)) {
4910	// we added an extra thread to this place; move to next place
4911	if (place == last_place) {
4912	place = first_place;
4913	} else if (place == (num_masks - `1`)) {
4914	place = `0`;
4915	} else {
4916	place++;
4917	}
4918	s_count = `0`;
4919	gap_ct = `1`;
4920	rem--;
4921	} else if (s_count == S) { // place full; don't add extra
4922	if (place == last_place) {
4923	place = first_place;
4924	} else if (place == (num_masks - `1`)) {
4925	place = `0`;
4926	} else {
4927	place++;
4928	}
4929	gap_ct++;
4930	s_count = `0`;
4931	}
4932
4933	KA_TRACE(`100`,
4934	("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4935	"partition = [%d,%d]\n",
4936	__kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4937	th->th.th_new_place, first_place, last_place));
4938	}
4939	KMP_DEBUG_ASSERT(place == masters_place);
4940	}
4941	} break;
4942
4943	case proc_bind_spread: {
4944	int f;
4945	int n_th = team->t.t_nproc;
4946	int n_places;
4947	int thidx;
4948	if (first_place <= last_place) {
4949	n_places = last_place - first_place + `1`;
4950	} else {
4951	n_places = num_masks - first_place + last_place + `1`;
4952	}
4953	if (n_th <= n_places) {
4954	int place = -`1`;
4955
4956	if (n_places != num_masks) {
4957	int S = n_places / n_th;
4958	int s_count, rem, gap, gap_ct;
4959
4960	place = masters_place;
4961	rem = n_places - n_th * S;
4962	gap = rem ? n_th / rem : `1`;
4963	gap_ct = gap;
4964	thidx = n_th;
4965	if (update_master_only == `1`)
4966	thidx = `1`;
4967	for (f = `0`; f < thidx; f++) {
4968	kmp_info_t *th = team->t.t_threads[f];
4969	KMP_DEBUG_ASSERT(th != NULL);
4970
4971	int fplace = place, nplace = place;
4972	s_count = `1`;
4973	while (s_count < S) {
4974	if (place == last_place) {
4975	place = first_place;
4976	} else if (place == (num_masks - `1`)) {
4977	place = `0`;
4978	} else {
4979	place++;
4980	}
4981	s_count++;
4982	}
4983	if (rem && (gap_ct == gap)) {
4984	if (place == last_place) {
4985	place = first_place;
4986	} else if (place == (num_masks - `1`)) {
4987	place = `0`;
4988	} else {
4989	place++;
4990	}
4991	rem--;
4992	gap_ct = `0`;
4993	}
4994	__kmp_set_thread_place(team, th, first: fplace, last: place, newp: nplace);
4995	gap_ct++;
4996
4997	if (place == last_place) {
4998	place = first_place;
4999	} else if (place == (num_masks - `1`)) {
5000	place = `0`;
5001	} else {
5002	place++;
5003	}
5004
5005	KA_TRACE(`100`,
5006	("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5007	"partition = [%d,%d], num_masks: %u\n",
5008	__kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
5009	f, th->th.th_new_place, th->th.th_first_place,
5010	th->th.th_last_place, num_masks));
5011	}
5012	} else {
5013	/ Having uniform space of available computation places I can create*
5014	T partitions of round(P/T) size and put threads into the first
5015	place of each partition. /*
5016	double current = static_cast<double>(masters_place);
5017	double spacing =
5018	(static_cast<double>(n_places + `1`) / static_cast<double>(n_th));
5019	int first, last;
5020	kmp_info_t *th;
5021
5022	thidx = n_th + `1`;
5023	if (update_master_only == `1`)
5024	thidx = `1`;
5025	for (f = `0`; f < thidx; f++) {
5026	first = static_cast<int>(current);
5027	last = static_cast<int>(current + spacing) - `1`;
5028	KMP_DEBUG_ASSERT(last >= first);
5029	if (first >= n_places) {
5030	if (masters_place) {
5031	first -= n_places;
5032	last -= n_places;
5033	if (first == (masters_place + `1`)) {
5034	KMP_DEBUG_ASSERT(f == n_th);
5035	first--;
5036	}
5037	if (last == masters_place) {
5038	KMP_DEBUG_ASSERT(f == (n_th - `1`));
5039	last--;
5040	}
5041	} else {
5042	KMP_DEBUG_ASSERT(f == n_th);
5043	first = `0`;
5044	last = `0`;
5045	}
5046	}
5047	if (last >= n_places) {
5048	last = (n_places - `1`);
5049	}
5050	place = first;
5051	current += spacing;
5052	if (f < n_th) {
5053	KMP_DEBUG_ASSERT(`0` <= first);
5054	KMP_DEBUG_ASSERT(n_places > first);
5055	KMP_DEBUG_ASSERT(`0` <= last);
5056	KMP_DEBUG_ASSERT(n_places > last);
5057	KMP_DEBUG_ASSERT(last_place >= first_place);
5058	th = team->t.t_threads[f];
5059	KMP_DEBUG_ASSERT(th);
5060	__kmp_set_thread_place(team, th, first, last, newp: place);
5061	KA_TRACE(`100`,
5062	("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5063	"partition = [%d,%d], spacing = %.4f\n",
5064	__kmp_gtid_from_thread(team->t.t_threads[f]),
5065	team->t.t_id, f, th->th.th_new_place,
5066	th->th.th_first_place, th->th.th_last_place, spacing));
5067	}
5068	}
5069	}
5070	KMP_DEBUG_ASSERT(update_master_only \|\| place == masters_place);
5071	} else {
5072	int S, rem, gap, s_count;
5073	S = n_th / n_places;
5074	s_count = `0`;
5075	rem = n_th - (S * n_places);
5076	gap = rem > `0` ? n_places / rem : n_places;
5077	int place = masters_place;
5078	int gap_ct = gap;
5079	thidx = n_th;
5080	if (update_master_only == `1`)
5081	thidx = `1`;
5082	for (f = `0`; f < thidx; f++) {
5083	kmp_info_t *th = team->t.t_threads[f];
5084	KMP_DEBUG_ASSERT(th != NULL);
5085
5086	__kmp_set_thread_place(team, th, first: place, last: place, newp: place);
5087	s_count++;
5088
5089	if ((s_count == S) && rem && (gap_ct == gap)) {
5090	// do nothing, add an extra thread to place on next iteration
5091	} else if ((s_count == S + `1`) && rem && (gap_ct == gap)) {
5092	// we added an extra thread to this place; move on to next place
5093	if (place == last_place) {
5094	place = first_place;
5095	} else if (place == (num_masks - `1`)) {
5096	place = `0`;
5097	} else {
5098	place++;
5099	}
5100	s_count = `0`;
5101	gap_ct = `1`;
5102	rem--;
5103	} else if (s_count == S) { // place is full; don't add extra thread
5104	if (place == last_place) {
5105	place = first_place;
5106	} else if (place == (num_masks - `1`)) {
5107	place = `0`;
5108	} else {
5109	place++;
5110	}
5111	gap_ct++;
5112	s_count = `0`;
5113	}
5114
5115	KA_TRACE(`100`, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5116	"partition = [%d,%d]\n",
5117	__kmp_gtid_from_thread(team->t.t_threads[f]),
5118	team->t.t_id, f, th->th.th_new_place,
5119	th->th.th_first_place, th->th.th_last_place));
5120	}
5121	KMP_DEBUG_ASSERT(update_master_only \|\| place == masters_place);
5122	}
5123	} break;
5124
5125	default:
5126	break;
5127	}
5128
5129	KA_TRACE(`20`, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));
5130	}
5131
5132	#endif // KMP_AFFINITY_SUPPORTED
5133
5134	/ allocate a new team data structure to use. take one off of the free pool if*
5135	available /*
5136	kmp_team_t *
5137	__kmp_allocate_team(kmp_root_t root, int* new_nproc, int max_nproc,
5138	#if OMPT_SUPPORT
5139	ompt_data_t ompt_parallel_data,
5140	#endif
5141	kmp_proc_bind_t new_proc_bind,
5142	kmp_internal_control_t *new_icvs,
5143	int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5144	KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5145	int f;
5146	kmp_team_t *team;
5147	int use_hot_team = !root->r.r_active;
5148	int level = `0`;
5149	int do_place_partition = `1`;
5150
5151	KA_TRACE(`20`, ("__kmp_allocate_team: called\n"));
5152	KMP_DEBUG_ASSERT(new_nproc >= `1` && argc >= `0`);
5153	KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5154	KMP_MB();
5155
5156	#if KMP_NESTED_HOT_TEAMS
5157	kmp_hot_team_ptr_t *hot_teams;
5158	if (master) {
5159	team = master->th.th_team;
5160	level = team->t.t_active_level;
5161	if (master->th.th_teams_microtask) { // in teams construct?
5162	if (master->th.th_teams_size.nteams > `1` &&
5163	( // #teams > 1
5164	team->t.t_pkfn ==
5165	(microtask_t)__kmp_teams_master \|\| // inner fork of the teams
5166	master->th.th_teams_level <
5167	team->t.t_level)) { // or nested parallel inside the teams
5168	++level; // not increment if #teams==1, or for outer fork of the teams;
5169	// increment otherwise
5170	}
5171	// Do not perform the place partition if inner fork of the teams
5172	// Wait until nested parallel region encountered inside teams construct
5173	if ((master->th.th_teams_size.nteams == `1` &&
5174	master->th.th_teams_level >= team->t.t_level) \|\|
5175	(team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5176	do_place_partition = `0`;
5177	}
5178	hot_teams = master->th.th_hot_teams;
5179	if (level < __kmp_hot_teams_max_level && hot_teams &&
5180	hot_teams[level].hot_team) {
5181	// hot team has already been allocated for given level
5182	use_hot_team = `1`;
5183	} else {
5184	use_hot_team = `0`;
5185	}
5186	} else {
5187	// check we won't access uninitialized hot_teams, just in case
5188	KMP_DEBUG_ASSERT(new_nproc == `1`);
5189	}
5190	#endif
5191	// Optimization to use a "hot" team
5192	if (use_hot_team && new_nproc > `1`) {
5193	KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5194	#if KMP_NESTED_HOT_TEAMS
5195	team = hot_teams[level].hot_team;
5196	#else
5197	team = root->r.r_hot_team;
5198	#endif
5199	#if KMP_DEBUG
5200	if (__kmp_tasking_mode != tskm_immediate_exec) {
5201	KA_TRACE(`20`, ("__kmp_allocate_team: hot team task_team[0] = %p "
5202	"task_team[1] = %p before reinit\n",
5203	team->t.t_task_team[`0`], team->t.t_task_team[`1`]));
5204	}
5205	#endif
5206
5207	if (team->t.t_nproc != new_nproc &&
5208	__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5209	// Distributed barrier may need a resize
5210	int old_nthr = team->t.t_nproc;
5211	__kmp_resize_dist_barrier(team, old_nthreads: old_nthr, new_nthreads: new_nproc);
5212	}
5213
5214	// If not doing the place partition, then reset the team's proc bind
5215	// to indicate that partitioning of all threads still needs to take place
5216	if (do_place_partition == `0`)
5217	team->t.t_proc_bind = proc_bind_default;
5218	// Has the number of threads changed?
5219	/ Let's assume the most common case is that the number of threads is*
5220	unchanged, and put that case first. /*
5221	if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
5222	KA_TRACE(`20`, ("__kmp_allocate_team: reusing hot team\n"));
5223	// This case can mean that omp_set_num_threads() was called and the hot
5224	// team size was already reduced, so we check the special flag
5225	if (team->t.t_size_changed == -`1`) {
5226	team->t.t_size_changed = `1`;
5227	} else {
5228	KMP_CHECK_UPDATE(team->t.t_size_changed, `0`);
5229	}
5230
5231	// TODO???: team->t.t_max_active_levels = new_max_active_levels;
5232	kmp_r_sched_t new_sched = new_icvs->sched;
5233	// set primary thread's schedule as new run-time schedule
5234	KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5235
5236	__kmp_reinitialize_team(team, new_icvs,
5237	loc: root->r.r_uber_thread->th.th_ident);
5238
5239	KF_TRACE(`10`, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", `0`,
5240	team->t.t_threads[`0`], team));
5241	__kmp_push_current_task_to_thread(this_thr: team->t.t_threads[`0`], team, tid: `0`);
5242
5243	#if KMP_AFFINITY_SUPPORTED
5244	if ((team->t.t_size_changed == `0`) &&
5245	(team->t.t_proc_bind == new_proc_bind)) {
5246	if (new_proc_bind == proc_bind_spread) {
5247	if (do_place_partition) {
5248	// add flag to update only master for spread
5249	__kmp_partition_places(team, update_master_only: `1`);
5250	}
5251	}
5252	KA_TRACE(`200`, ("__kmp_allocate_team: reusing hot team #%d bindings: "
5253	"proc_bind = %d, partition = [%d,%d]\n",
5254	team->t.t_id, new_proc_bind, team->t.t_first_place,
5255	team->t.t_last_place));
5256	} else {
5257	if (do_place_partition) {
5258	KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5259	__kmp_partition_places(team);
5260	}
5261	}
5262	#else
5263	KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5264	#endif /* KMP_AFFINITY_SUPPORTED */
5265	} else if (team->t.t_nproc > new_nproc) {
5266	KA_TRACE(`20`,
5267	("__kmp_allocate_team: decreasing hot team thread count to %d\n",
5268	new_nproc));
5269
5270	team->t.t_size_changed = `1`;
5271	if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5272	// Barrier size already reduced earlier in this function
5273	// Activate team threads via th_used_in_team
5274	__kmp_add_threads_to_team(team, new_nthreads: new_nproc);
5275	}
5276	// When decreasing team size, threads no longer in the team should
5277	// unref task team.
5278	if (__kmp_tasking_mode != tskm_immediate_exec) {
5279	for (f = new_nproc; f < team->t.t_nproc; f++) {
5280	kmp_info_t *th = team->t.t_threads[f];
5281	KMP_DEBUG_ASSERT(th);
5282	th->th.th_task_team = NULL;
5283	}
5284	}
5285	#if KMP_NESTED_HOT_TEAMS
5286	if (__kmp_hot_teams_mode == `0`) {
5287	// AC: saved number of threads should correspond to team's value in this
5288	// mode, can be bigger in mode 1, when hot team has threads in reserve
5289	KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5290	hot_teams[level].hot_team_nth = new_nproc;
5291	#endif // KMP_NESTED_HOT_TEAMS
5292	/ release the extra threads we don't need any more /
5293	for (f = new_nproc; f < team->t.t_nproc; f++) {
5294	KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5295	__kmp_free_thread(team->t.t_threads[f]);
5296	team->t.t_threads[f] = NULL;
5297	}
5298	#if KMP_NESTED_HOT_TEAMS
5299	} // (__kmp_hot_teams_mode == 0)
5300	else {
5301	// When keeping extra threads in team, switch threads to wait on own
5302	// b_go flag
5303	for (f = new_nproc; f < team->t.t_nproc; ++f) {
5304	KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5305	kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5306	for (int b = `0`; b < bs_last_barrier; ++b) {
5307	if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5308	balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5309	}
5310	KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, `0`);
5311	}
5312	}
5313	}
5314	#endif // KMP_NESTED_HOT_TEAMS
5315	team->t.t_nproc = new_nproc;
5316	// TODO???: team->t.t_max_active_levels = new_max_active_levels;
5317	KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5318	__kmp_reinitialize_team(team, new_icvs,
5319	loc: root->r.r_uber_thread->th.th_ident);
5320
5321	// Update remaining threads
5322	for (f = `0`; f < new_nproc; ++f) {
5323	team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5324	}
5325
5326	// restore the current task state of the primary thread: should be the
5327	// implicit task
5328	KF_TRACE(`10`, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", `0`,
5329	team->t.t_threads[`0`], team));
5330
5331	__kmp_push_current_task_to_thread(this_thr: team->t.t_threads[`0`], team, tid: `0`);
5332
5333	#ifdef KMP_DEBUG
5334	for (f = `0`; f < team->t.t_nproc; f++) {
5335	KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5336	team->t.t_threads[f]->th.th_team_nproc ==
5337	team->t.t_nproc);
5338	}
5339	#endif
5340
5341	if (do_place_partition) {
5342	KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5343	#if KMP_AFFINITY_SUPPORTED
5344	__kmp_partition_places(team);
5345	#endif
5346	}
5347	} else { // team->t.t_nproc < new_nproc
5348
5349	KA_TRACE(`20`,
5350	("__kmp_allocate_team: increasing hot team thread count to %d\n",
5351	new_nproc));
5352	int old_nproc = team->t.t_nproc; // save old value and use to update only
5353	team->t.t_size_changed = `1`;
5354
5355	#if KMP_NESTED_HOT_TEAMS
5356	int avail_threads = hot_teams[level].hot_team_nth;
5357	if (new_nproc < avail_threads)
5358	avail_threads = new_nproc;
5359	kmp_info_t **other_threads = team->t.t_threads;
5360	for (f = team->t.t_nproc; f < avail_threads; ++f) {
5361	// Adjust barrier data of reserved threads (if any) of the team
5362	// Other data will be set in __kmp_initialize_info() below.
5363	int b;
5364	kmp_balign_t *balign = other_threads[f]->th.th_bar;
5365	for (b = `0`; b < bs_last_barrier; ++b) {
5366	balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5367	KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5368	#if USE_DEBUGGER
5369	balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5370	#endif
5371	}
5372	}
5373	if (hot_teams[level].hot_team_nth >= new_nproc) {
5374	// we have all needed threads in reserve, no need to allocate any
5375	// this only possible in mode 1, cannot have reserved threads in mode 0
5376	KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == `1`);
5377	team->t.t_nproc = new_nproc; // just get reserved threads involved
5378	} else {
5379	// We may have some threads in reserve, but not enough;
5380	// get reserved threads involved if any.
5381	team->t.t_nproc = hot_teams[level].hot_team_nth;
5382	hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
5383	#endif // KMP_NESTED_HOT_TEAMS
5384	if (team->t.t_max_nproc < new_nproc) {
5385	/ reallocate larger arrays /
5386	__kmp_reallocate_team_arrays(team, max_nth: new_nproc);
5387	__kmp_reinitialize_team(team, new_icvs, NULL);
5388	}
5389
5390	#if (KMP_OS_LINUX \|\| KMP_OS_FREEBSD \|\| KMP_OS_NETBSD \|\| KMP_OS_DRAGONFLY) && \
5391	KMP_AFFINITY_SUPPORTED
5392	/ Temporarily set full mask for primary thread before creation of*
5393	workers. The reason is that workers inherit the affinity from the
5394	primary thread, so if a lot of workers are created on the single
5395	core quickly, they don't get a chance to set their own affinity for
5396	a long time. /*
5397	kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5398	#endif
5399
5400	/ allocate new threads for the hot team /
5401	for (f = team->t.t_nproc; f < new_nproc; f++) {
5402	kmp_info_t *new_worker = __kmp_allocate_thread(root, team, new_tid: f);
5403	KMP_DEBUG_ASSERT(new_worker);
5404	team->t.t_threads[f] = new_worker;
5405
5406	KA_TRACE(`20`,
5407	("__kmp_allocate_team: team %d init T#%d arrived: "
5408	"join=%llu, plain=%llu\n",
5409	team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5410	team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5411	team->t.t_bar[bs_plain_barrier].b_arrived));
5412
5413	{ // Initialize barrier data for new threads.
5414	int b;
5415	kmp_balign_t *balign = new_worker->th.th_bar;
5416	for (b = `0`; b < bs_last_barrier; ++b) {
5417	balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5418	KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5419	KMP_BARRIER_PARENT_FLAG);
5420	#if USE_DEBUGGER
5421	balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5422	#endif
5423	}
5424	}
5425	}
5426
5427	#if (KMP_OS_LINUX \|\| KMP_OS_FREEBSD \|\| KMP_OS_NETBSD \|\| KMP_OS_DRAGONFLY) && \
5428	KMP_AFFINITY_SUPPORTED
5429	/ Restore initial primary thread's affinity mask /
5430	new_temp_affinity.restore();
5431	#endif
5432	#if KMP_NESTED_HOT_TEAMS
5433	} // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5434	#endif // KMP_NESTED_HOT_TEAMS
5435	if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5436	// Barrier size already increased earlier in this function
5437	// Activate team threads via th_used_in_team
5438	__kmp_add_threads_to_team(team, new_nthreads: new_nproc);
5439	}
5440	/ make sure everyone is syncronized /
5441	// new threads below
5442	__kmp_initialize_team(team, new_nproc, new_icvs,
5443	loc: root->r.r_uber_thread->th.th_ident);
5444
5445	/ reinitialize the threads /
5446	KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5447	for (f = `0`; f < team->t.t_nproc; ++f)
5448	__kmp_initialize_info(this_thr: team->t.t_threads[f], team, tid: f,
5449	gtid: __kmp_gtid_from_tid(tid: f, team));
5450
5451	// set th_task_state for new threads in hot team with older thread's state
5452	kmp_uint8 old_state = team->t.t_threads[old_nproc - `1`]->th.th_task_state;
5453	for (f = old_nproc; f < team->t.t_nproc; ++f)
5454	team->t.t_threads[f]->th.th_task_state = old_state;
5455
5456	#ifdef KMP_DEBUG
5457	for (f = `0`; f < team->t.t_nproc; ++f) {
5458	KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5459	team->t.t_threads[f]->th.th_team_nproc ==
5460	team->t.t_nproc);
5461	}
5462	#endif
5463
5464	if (do_place_partition) {
5465	KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5466	#if KMP_AFFINITY_SUPPORTED
5467	__kmp_partition_places(team);
5468	#endif
5469	}
5470	} // Check changes in number of threads
5471
5472	if (master->th.th_teams_microtask) {
5473	for (f = `1`; f < new_nproc; ++f) {
5474	// propagate teams construct specific info to workers
5475	kmp_info_t *thr = team->t.t_threads[f];
5476	thr->th.th_teams_microtask = master->th.th_teams_microtask;
5477	thr->th.th_teams_level = master->th.th_teams_level;
5478	thr->th.th_teams_size = master->th.th_teams_size;
5479	}
5480	}
5481	#if KMP_NESTED_HOT_TEAMS
5482	if (level) {
5483	// Sync barrier state for nested hot teams, not needed for outermost hot
5484	// team.
5485	for (f = `1`; f < new_nproc; ++f) {
5486	kmp_info_t *thr = team->t.t_threads[f];
5487	int b;
5488	kmp_balign_t *balign = thr->th.th_bar;
5489	for (b = `0`; b < bs_last_barrier; ++b) {
5490	balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5491	KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5492	#if USE_DEBUGGER
5493	balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5494	#endif
5495	}
5496	}
5497	}
5498	#endif // KMP_NESTED_HOT_TEAMS
5499
5500	/ reallocate space for arguments if necessary /
5501	__kmp_alloc_argv_entries(argc, team, TRUE);
5502	KMP_CHECK_UPDATE(team->t.t_argc, argc);
5503	// The hot team re-uses the previous task team,
5504	// if untouched during the previous release->gather phase.
5505
5506	KF_TRACE(`10`, (" hot_team = %p\n", team));
5507
5508	#if KMP_DEBUG
5509	if (__kmp_tasking_mode != tskm_immediate_exec) {
5510	KA_TRACE(`20`, ("__kmp_allocate_team: hot team task_team[0] = %p "
5511	"task_team[1] = %p after reinit\n",
5512	team->t.t_task_team[`0`], team->t.t_task_team[`1`]));
5513	}
5514	#endif
5515
5516	#if OMPT_SUPPORT
5517	__ompt_team_assign_id(team, ompt_pid: ompt_parallel_data);
5518	#endif
5519
5520	KMP_MB();
5521
5522	return team;
5523	}
5524
5525	/ next, let's try to take one from the team pool /
5526	KMP_MB();
5527	for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5528	/ TODO: consider resizing undersized teams instead of reaping them, now*
5529	that we have a resizing mechanism /*
5530	if (team->t.t_max_nproc >= max_nproc) {
5531	/ take this team from the team pool /
5532	__kmp_team_pool = team->t.t_next_pool;
5533
5534	if (max_nproc > `1` &&
5535	__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5536	if (!team->t.b) { // Allocate barrier structure
5537	team->t.b = distributedBarrier::allocate(nThreads: __kmp_dflt_team_nth_ub);
5538	}
5539	}
5540
5541	/ setup the team for fresh use /
5542	__kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5543
5544	KA_TRACE(`20`, ("__kmp_allocate_team: setting task_team[0] %p and "
5545	"task_team[1] %p to NULL\n",
5546	&team->t.t_task_team[`0`], &team->t.t_task_team[`1`]));
5547	team->t.t_task_team[`0`] = NULL;
5548	team->t.t_task_team[`1`] = NULL;
5549
5550	/ reallocate space for arguments if necessary /
5551	__kmp_alloc_argv_entries(argc, team, TRUE);
5552	KMP_CHECK_UPDATE(team->t.t_argc, argc);
5553
5554	KA_TRACE(
5555	`20`, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5556	team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5557	{ // Initialize barrier data.
5558	int b;
5559	for (b = `0`; b < bs_last_barrier; ++b) {
5560	team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5561	#if USE_DEBUGGER
5562	team->t.t_bar[b].b_master_arrived = `0`;
5563	team->t.t_bar[b].b_team_arrived = `0`;
5564	#endif
5565	}
5566	}
5567
5568	team->t.t_proc_bind = new_proc_bind;
5569
5570	KA_TRACE(`20`, ("__kmp_allocate_team: using team from pool %d.\n",
5571	team->t.t_id));
5572
5573	#if OMPT_SUPPORT
5574	__ompt_team_assign_id(team, ompt_pid: ompt_parallel_data);
5575	#endif
5576
5577	team->t.t_nested_nth = NULL;
5578
5579	KMP_MB();
5580
5581	return team;
5582	}
5583
5584	/ reap team if it is too small, then loop back and check the next one /
5585	// not sure if this is wise, but, will be redone during the hot-teams
5586	// rewrite.
5587	/ TODO: Use technique to find the right size hot-team, don't reap them /
5588	team = __kmp_reap_team(team);
5589	__kmp_team_pool = team;
5590	}
5591
5592	/ nothing available in the pool, no matter, make a new team! /
5593	KMP_MB();
5594	team = (kmp_team_t )__kmp_allocate(sizeof*(kmp_team_t));
5595
5596	/ and set it up /
5597	team->t.t_max_nproc = max_nproc;
5598	if (max_nproc > `1` &&
5599	__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5600	// Allocate barrier structure
5601	team->t.b = distributedBarrier::allocate(nThreads: __kmp_dflt_team_nth_ub);
5602	}
5603
5604	/ NOTE well, for some reason allocating one big buffer and dividing it up*
5605	seems to really hurt performance a lot on the P4, so, let's not use this /*
5606	__kmp_allocate_team_arrays(team, max_nth: max_nproc);
5607
5608	KA_TRACE(`20`, ("__kmp_allocate_team: making a new team\n"));
5609	__kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5610
5611	KA_TRACE(`20`, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5612	"%p to NULL\n",
5613	&team->t.t_task_team[`0`], &team->t.t_task_team[`1`]));
5614	team->t.t_task_team[`0`] = NULL; // to be removed, as __kmp_allocate zeroes
5615	// memory, no need to duplicate
5616	team->t.t_task_team[`1`] = NULL; // to be removed, as __kmp_allocate zeroes
5617	// memory, no need to duplicate
5618
5619	if (__kmp_storage_map) {
5620	__kmp_print_team_storage_map(header: "team", team, team_id: team->t.t_id, num_thr: new_nproc);
5621	}
5622
5623	/ allocate space for arguments /
5624	__kmp_alloc_argv_entries(argc, team, FALSE);
5625	team->t.t_argc = argc;
5626
5627	KA_TRACE(`20`,
5628	("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5629	team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5630	{ // Initialize barrier data.
5631	int b;
5632	for (b = `0`; b < bs_last_barrier; ++b) {
5633	team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5634	#if USE_DEBUGGER
5635	team->t.t_bar[b].b_master_arrived = `0`;
5636	team->t.t_bar[b].b_team_arrived = `0`;
5637	#endif
5638	}
5639	}
5640
5641	team->t.t_proc_bind = new_proc_bind;
5642
5643	#if OMPT_SUPPORT
5644	__ompt_team_assign_id(team, ompt_pid: ompt_parallel_data);
5645	team->t.ompt_serialized_team_info = NULL;
5646	#endif
5647
5648	KMP_MB();
5649
5650	team->t.t_nested_nth = NULL;
5651
5652	KA_TRACE(`20`, ("__kmp_allocate_team: done creating a new team %d.\n",
5653	team->t.t_id));
5654
5655	return team;
5656	}
5657
5658	/ TODO implement hot-teams at all levels /
5659	/ TODO implement lazy thread release on demand (disband request) /
5660
5661	/ free the team. return it to the team pool. release all the threads*
5662	* associated with it */
5663	void __kmp_free_team(kmp_root_t *root,
5664	kmp_team_t team USE_NESTED_HOT_ARG(kmp_info_t master)) {
5665	int f;
5666	KA_TRACE(`20`, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5667	team->t.t_id));
5668
5669	/ verify state /
5670	KMP_DEBUG_ASSERT(root);
5671	KMP_DEBUG_ASSERT(team);
5672	KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5673	KMP_DEBUG_ASSERT(team->t.t_threads);
5674
5675	int use_hot_team = team == root->r.r_hot_team;
5676	#if KMP_NESTED_HOT_TEAMS
5677	int level;
5678	if (master) {
5679	level = team->t.t_active_level - `1`;
5680	if (master->th.th_teams_microtask) { // in teams construct?
5681	if (master->th.th_teams_size.nteams > `1`) {
5682	++level; // level was not increased in teams construct for
5683	// team_of_masters
5684	}
5685	if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5686	master->th.th_teams_level == team->t.t_level) {
5687	++level; // level was not increased in teams construct for
5688	// team_of_workers before the parallel
5689	} // team->t.t_level will be increased inside parallel
5690	}
5691	#if KMP_DEBUG
5692	kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5693	#endif
5694	if (level < __kmp_hot_teams_max_level) {
5695	KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5696	use_hot_team = `1`;
5697	}
5698	}
5699	#endif // KMP_NESTED_HOT_TEAMS
5700
5701	/ team is done working /
5702	TCW_SYNC_PTR(team->t.t_pkfn,
5703	NULL); // Important for Debugging Support Library.
5704	#if KMP_OS_WINDOWS
5705	team->t.t_copyin_counter = `0`; // init counter for possible reuse
5706	#endif
5707	// Do not reset pointer to parent team to NULL for hot teams.
5708
5709	/ if we are non-hot team, release our threads /
5710	if (!use_hot_team) {
5711	if (__kmp_tasking_mode != tskm_immediate_exec) {
5712	// Wait for threads to reach reapable state
5713	for (f = `1`; f < team->t.t_nproc; ++f) {
5714	KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5715	kmp_info_t *th = team->t.t_threads[f];
5716	volatile kmp_uint32 *state = &th->th.th_reap_state;
5717	while (*state != KMP_SAFE_TO_REAP) {
5718	#if KMP_OS_WINDOWS
5719	// On Windows a thread can be killed at any time, check this
5720	DWORD ecode;
5721	if (!__kmp_is_thread_alive(th, &ecode)) {
5722	state = KMP_SAFE_TO_REAP; // reset the flag for dead thread*
5723	break;
5724	}
5725	#endif
5726	// first check if thread is sleeping
5727	if (th->th.th_sleep_loc)
5728	__kmp_null_resume_wrapper(thr: th);
5729	KMP_CPU_PAUSE();
5730	}
5731	}
5732
5733	// Delete task teams
5734	int tt_idx;
5735	for (tt_idx = `0`; tt_idx < `2`; ++tt_idx) {
5736	kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5737	if (task_team != NULL) {
5738	for (f = `0`; f < team->t.t_nproc; ++f) { // threads unref task teams
5739	KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5740	team->t.t_threads[f]->th.th_task_team = NULL;
5741	}
5742	KA_TRACE(
5743	`20`,
5744	("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5745	__kmp_get_gtid(), task_team, team->t.t_id));
5746	#if KMP_NESTED_HOT_TEAMS
5747	__kmp_free_task_team(thread: master, task_team);
5748	#endif
5749	team->t.t_task_team[tt_idx] = NULL;
5750	}
5751	}
5752	}
5753
5754	// Before clearing parent pointer, check if nested_nth list should be freed
5755	if (team->t.t_nested_nth && team->t.t_nested_nth != &__kmp_nested_nth &&
5756	team->t.t_nested_nth != team->t.t_parent->t.t_nested_nth) {
5757	KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
5758	KMP_INTERNAL_FREE(team->t.t_nested_nth);
5759	}
5760	team->t.t_nested_nth = NULL;
5761
5762	// Reset pointer to parent team only for non-hot teams.
5763	team->t.t_parent = NULL;
5764	team->t.t_level = `0`;
5765	team->t.t_active_level = `0`;
5766
5767	/ free the worker threads /
5768	for (f = `1`; f < team->t.t_nproc; ++f) {
5769	KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5770	if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5771	(void)KMP_COMPARE_AND_STORE_ACQ32(
5772	&(team->t.t_threads[f]->th.th_used_in_team), `1`, `2`);
5773	}
5774	__kmp_free_thread(team->t.t_threads[f]);
5775	}
5776
5777	if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5778	if (team->t.b) {
5779	// wake up thread at old location
5780	team->t.b->go_release();
5781	if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5782	for (f = `1`; f < team->t.t_nproc; ++f) {
5783	if (team->t.b->sleep[f].sleep) {
5784	__kmp_atomic_resume_64(
5785	target_gtid: team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5786	flag: (kmp_atomic_flag_64<> *)NULL);
5787	}
5788	}
5789	}
5790	// Wait for threads to be removed from team
5791	for (int f = `1`; f < team->t.t_nproc; ++f) {
5792	while (team->t.t_threads[f]->th.th_used_in_team.load() != `0`)
5793	KMP_CPU_PAUSE();
5794	}
5795	}
5796	}
5797
5798	for (f = `1`; f < team->t.t_nproc; ++f) {
5799	team->t.t_threads[f] = NULL;
5800	}
5801
5802	if (team->t.t_max_nproc > `1` &&
5803	__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5804	distributedBarrier::deallocate(db: team->t.b);
5805	team->t.b = NULL;
5806	}
5807	/ put the team back in the team pool /
5808	/ TODO limit size of team pool, call reap_team if pool too large /
5809	team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5810	__kmp_team_pool = (volatile kmp_team_t *)team;
5811	} else { // Check if team was created for primary threads in teams construct
5812	// See if first worker is a CG root
5813	KMP_DEBUG_ASSERT(team->t.t_threads[`1`] &&
5814	team->t.t_threads[`1`]->th.th_cg_roots);
5815	if (team->t.t_threads[`1`]->th.th_cg_roots->cg_root == team->t.t_threads[`1`]) {
5816	// Clean up the CG root nodes on workers so that this team can be re-used
5817	for (f = `1`; f < team->t.t_nproc; ++f) {
5818	kmp_info_t *thr = team->t.t_threads[f];
5819	KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5820	thr->th.th_cg_roots->cg_root == thr);
5821	// Pop current CG root off list
5822	kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5823	thr->th.th_cg_roots = tmp->up;
5824	KA_TRACE(`100`, ("__kmp_free_team: Thread %p popping node %p and moving"
5825	" up to node %p. cg_nthreads was %d\n",
5826	thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5827	int i = tmp->cg_nthreads--;
5828	if (i == `1`) {
5829	__kmp_free(tmp); // free CG if we are the last thread in it
5830	}
5831	// Restore current task's thread_limit from CG root
5832	if (thr->th.th_cg_roots)
5833	thr->th.th_current_task->td_icvs.thread_limit =
5834	thr->th.th_cg_roots->cg_thread_limit;
5835	}
5836	}
5837	}
5838
5839	KMP_MB();
5840	}
5841
5842	/ reap the team. destroy it, reclaim all its resources and free its memory /
5843	kmp_team_t __kmp_reap_team(kmp_team_t team) {
5844	kmp_team_t *next_pool = team->t.t_next_pool;
5845
5846	KMP_DEBUG_ASSERT(team);
5847	KMP_DEBUG_ASSERT(team->t.t_dispatch);
5848	KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5849	KMP_DEBUG_ASSERT(team->t.t_threads);
5850	KMP_DEBUG_ASSERT(team->t.t_argv);
5851
5852	/ TODO clean the threads that are a part of this? /
5853
5854	/ free stuff /
5855	__kmp_free_team_arrays(team);
5856	if (team->t.t_argv != &team->t.t_inline_argv[`0`])
5857	__kmp_free((void *)team->t.t_argv);
5858	__kmp_free(team);
5859
5860	KMP_MB();
5861	return next_pool;
5862	}
5863
5864	// Free the thread. Don't reap it, just place it on the pool of available
5865	// threads.
5866	//
5867	// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5868	// binding for the affinity mechanism to be useful.
5869	//
5870	// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5871	// However, we want to avoid a potential performance problem by always
5872	// scanning through the list to find the correct point at which to insert
5873	// the thread (potential N2 behavior). To do this we keep track of the
5874	// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5875	// With single-level parallelism, threads will always be added to the tail
5876	// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5877	// parallelism, all bets are off and we may need to scan through the entire
5878	// free list.
5879	//
5880	// This change also has a potentially large performance benefit, for some
5881	// applications. Previously, as threads were freed from the hot team, they
5882	// would be placed back on the free list in inverse order. If the hot team
5883	// grew back to it's original size, then the freed thread would be placed
5884	// back on the hot team in reverse order. This could cause bad cache
5885	// locality problems on programs where the size of the hot team regularly
5886	// grew and shrunk.
5887	//
5888	// Now, for single-level parallelism, the OMP tid is always == gtid.
5889	void __kmp_free_thread(kmp_info_t *this_th) {
5890	int gtid;
5891	kmp_info_t **scan;
5892
5893	KA_TRACE(`20`, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5894	__kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5895
5896	KMP_DEBUG_ASSERT(this_th);
5897
5898	// When moving thread to pool, switch thread to wait on own b_go flag, and
5899	// uninitialized (NULL team).
5900	int b;
5901	kmp_balign_t *balign = this_th->th.th_bar;
5902	for (b = `0`; b < bs_last_barrier; ++b) {
5903	if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5904	balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5905	balign[b].bb.team = NULL;
5906	balign[b].bb.leaf_kids = `0`;
5907	}
5908	this_th->th.th_task_state = `0`;
5909	this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5910
5911	/ put thread back on the free pool /
5912	TCW_PTR(this_th->th.th_team, NULL);
5913	TCW_PTR(this_th->th.th_root, NULL);
5914	TCW_PTR(this_th->th.th_dispatch, NULL); / NOT NEEDED /
5915
5916	while (this_th->th.th_cg_roots) {
5917	this_th->th.th_cg_roots->cg_nthreads--;
5918	KA_TRACE(`100`, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5919	" %p of thread %p to %d\n",
5920	this_th, this_th->th.th_cg_roots,
5921	this_th->th.th_cg_roots->cg_root,
5922	this_th->th.th_cg_roots->cg_nthreads));
5923	kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5924	if (tmp->cg_root == this_th) { // Thread is a cg_root
5925	KMP_DEBUG_ASSERT(tmp->cg_nthreads == `0`);
5926	KA_TRACE(
5927	`5`, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5928	this_th->th.th_cg_roots = tmp->up;
5929	__kmp_free(tmp);
5930	} else { // Worker thread
5931	if (tmp->cg_nthreads == `0`) { // last thread leaves contention group
5932	__kmp_free(tmp);
5933	}
5934	this_th->th.th_cg_roots = NULL;
5935	break;
5936	}
5937	}
5938
5939	/ If the implicit task assigned to this thread can be used by other threads*
5940	* -> multiple threads can share the data and try to free the task at
5941	* __kmp_reap_thread at exit. This duplicate use of the task data can happen
5942	* with higher probability when hot team is disabled but can occurs even when
5943	* the hot team is enabled */
5944	__kmp_free_implicit_task(this_thr: this_th);
5945	this_th->th.th_current_task = NULL;
5946
5947	// If the __kmp_thread_pool_insert_pt is already past the new insert
5948	// point, then we need to re-scan the entire list.
5949	gtid = this_th->th.th_info.ds.ds_gtid;
5950	if (__kmp_thread_pool_insert_pt != NULL) {
5951	KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5952	if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5953	__kmp_thread_pool_insert_pt = NULL;
5954	}
5955	}
5956
5957	// Scan down the list to find the place to insert the thread.
5958	// scan is the address of a link in the list, possibly the address of
5959	// __kmp_thread_pool itself.
5960	//
5961	// In the absence of nested parallelism, the for loop will have 0 iterations.
5962	if (__kmp_thread_pool_insert_pt != NULL) {
5963	scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5964	} else {
5965	scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5966	}
5967	for (; (scan != NULL) && ((scan)->th.th_info.ds.ds_gtid < gtid);
5968	scan = &((*scan)->th.th_next_pool))
5969	;
5970
5971	// Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5972	// to its address.
5973	TCW_PTR(this_th->th.th_next_pool, *scan);
5974	__kmp_thread_pool_insert_pt = *scan = this_th;
5975	KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) \|\|
5976	(this_th->th.th_info.ds.ds_gtid <
5977	this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5978	TCW_4(this_th->th.th_in_pool, TRUE);
5979	__kmp_suspend_initialize_thread(th: this_th);
5980	__kmp_lock_suspend_mx(th: this_th);
5981	if (this_th->th.th_active == TRUE) {
5982	KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5983	this_th->th.th_active_in_pool = TRUE;
5984	}
5985	#if KMP_DEBUG
5986	else {
5987	KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5988	}
5989	#endif
5990	__kmp_unlock_suspend_mx(th: this_th);
5991
5992	TCW_4(__kmp_nth, __kmp_nth - `1`);
5993
5994	#ifdef KMP_ADJUST_BLOCKTIME
5995	/ Adjust blocktime back to user setting or default if necessary /
5996	/ Middle initialization might never have occurred /
5997	if (!__kmp_env_blocktime && (__kmp_avail_proc > `0`)) {
5998	KMP_DEBUG_ASSERT(__kmp_avail_proc > `0`);
5999	if (__kmp_nth <= __kmp_avail_proc) {
6000	__kmp_zero_bt = FALSE;
6001	}
6002	}
6003	#endif /* KMP_ADJUST_BLOCKTIME */
6004
6005	KMP_MB();
6006	}
6007
6008	/ ------------------------------------------------------------------------ /
6009
6010	void __kmp_launch_thread(kmp_info_t this_thr) {
6011	#if OMP_PROFILING_SUPPORT
6012	ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");
6013	// TODO: add a configuration option for time granularity
6014	if (ProfileTraceFile)
6015	llvm::timeTraceProfilerInitialize(`500` / us /, "libomptarget");
6016	#endif
6017
6018	int gtid = this_thr->th.th_info.ds.ds_gtid;
6019	/ void stack_data;/*
6020	kmp_team_t **volatile pteam;
6021
6022	KMP_MB();
6023	KA_TRACE(`10`, ("__kmp_launch_thread: T#%d start\n", gtid));
6024
6025	if (__kmp_env_consistency_check) {
6026	this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
6027	}
6028
6029	#if OMPD_SUPPORT
6030	if (ompd_state & OMPD_ENABLE_BP)
6031	ompd_bp_thread_begin();
6032	#endif
6033
6034	#if OMPT_SUPPORT
6035	ompt_data_t thread_data = nullptr*;
6036	if (ompt_enabled.enabled) {
6037	thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6038	*thread_data = ompt_data_none;
6039
6040	this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6041	this_thr->th.ompt_thread_info.wait_id = `0`;
6042	this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(`0`);
6043	this_thr->th.ompt_thread_info.parallel_flags = `0`;
6044	if (ompt_enabled.ompt_callback_thread_begin) {
6045	ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6046	ompt_thread_worker, thread_data);
6047	}
6048	this_thr->th.ompt_thread_info.state = ompt_state_idle;
6049	}
6050	#endif
6051
6052	/ This is the place where threads wait for work /
6053	while (!TCR_4(__kmp_global.g.g_done)) {
6054	KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6055	KMP_MB();
6056
6057	/ wait for work to do /
6058	KA_TRACE(`20`, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
6059
6060	/ No tid yet since not part of a team /
6061	__kmp_fork_barrier(gtid, KMP_GTID_DNE);
6062
6063	#if OMPT_SUPPORT
6064	if (ompt_enabled.enabled) {
6065	this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6066	}
6067	#endif
6068
6069	pteam = &this_thr->th.th_team;
6070
6071	/ have we been allocated? /
6072	if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6073	/ we were just woken up, so run our new task /
6074	if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6075	int rc;
6076	KA_TRACE(`20`,
6077	("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6078	gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6079	(*pteam)->t.t_pkfn));
6080
6081	updateHWFPControl(team: *pteam);
6082
6083	#if OMPT_SUPPORT
6084	if (ompt_enabled.enabled) {
6085	this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6086	}
6087	#endif
6088
6089	rc = (*pteam)->t.t_invoke(gtid);
6090	KMP_ASSERT(rc);
6091
6092	KMP_MB();
6093	KA_TRACE(`20`, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6094	gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6095	(*pteam)->t.t_pkfn));
6096	}
6097	#if OMPT_SUPPORT
6098	if (ompt_enabled.enabled) {
6099	/ no frame set while outside task /
6100	__ompt_get_task_info_object(depth: `0`)->frame.exit_frame = ompt_data_none;
6101
6102	this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6103	}
6104	#endif
6105	/ join barrier after parallel region /
6106	__kmp_join_barrier(gtid);
6107	}
6108	}
6109
6110	#if OMPD_SUPPORT
6111	if (ompd_state & OMPD_ENABLE_BP)
6112	ompd_bp_thread_end();
6113	#endif
6114
6115	#if OMPT_SUPPORT
6116	if (ompt_enabled.ompt_callback_thread_end) {
6117	ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6118	}
6119	#endif
6120
6121	this_thr->th.th_task_team = NULL;
6122	/ run the destructors for the threadprivate data for this thread /
6123	__kmp_common_destroy_gtid(gtid);
6124
6125	KA_TRACE(`10`, ("__kmp_launch_thread: T#%d done\n", gtid));
6126	KMP_MB();
6127
6128	#if OMP_PROFILING_SUPPORT
6129	llvm::timeTraceProfilerFinishThread();
6130	#endif
6131	return this_thr;
6132	}
6133
6134	/ ------------------------------------------------------------------------ /
6135
6136	void __kmp_internal_end_dest(void *specific_gtid) {
6137	// Make sure no significant bits are lost
6138	int gtid;
6139	__kmp_type_convert(src: (kmp_intptr_t)specific_gtid - `1`, dest: &gtid);
6140
6141	KA_TRACE(`30`, ("__kmp_internal_end_dest: T#%d\n", gtid));
6142	/ NOTE: the gtid is stored as gitd+1 in the thread-local-storage*
6143	* this is because 0 is reserved for the nothing-stored case */
6144
6145	__kmp_internal_end_thread(gtid);
6146	}
6147
6148	#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6149
6150	__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
6151	__kmp_internal_end_atexit();
6152	}
6153
6154	#endif
6155
6156	/ [Windows] josh: when the atexit handler is called, there may still be more*
6157	than one thread alive /*
6158	void __kmp_internal_end_atexit(void) {
6159	KA_TRACE(`30`, ("__kmp_internal_end_atexit\n"));
6160	/ [Windows]*
6161	josh: ideally, we want to completely shutdown the library in this atexit
6162	handler, but stat code that depends on thread specific data for gtid fails
6163	because that data becomes unavailable at some point during the shutdown, so
6164	we call __kmp_internal_end_thread instead. We should eventually remove the
6165	dependency on __kmp_get_specific_gtid in the stat code and use
6166	__kmp_internal_end_library to cleanly shutdown the library.
6167
6168	// TODO: Can some of this comment about GVS be removed?
6169	I suspect that the offending stat code is executed when the calling thread
6170	tries to clean up a dead root thread's data structures, resulting in GVS
6171	code trying to close the GVS structures for that thread, but since the stat
6172	code uses __kmp_get_specific_gtid to get the gtid with the assumption that
6173	the calling thread is cleaning up itself instead of another thread, it get
6174	confused. This happens because allowing a thread to unregister and cleanup
6175	another thread is a recent modification for addressing an issue.
6176	Based on the current design (20050722), a thread may end up
6177	trying to unregister another thread only if thread death does not trigger
6178	the calling of __kmp_internal_end_thread. For Linux OS, there is the*
6179	thread specific data destructor function to detect thread death. For
6180	Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
6181	is nothing. Thus, the workaround is applicable only for Windows static
6182	stat library. /*
6183	__kmp_internal_end_library(gtid: -`1`);
6184	#if KMP_OS_WINDOWS
6185	__kmp_close_console();
6186	#endif
6187	}
6188
6189	static void __kmp_reap_thread(kmp_info_t thread, int* is_root) {
6190	// It is assumed __kmp_forkjoin_lock is acquired.
6191
6192	int gtid;
6193
6194	KMP_DEBUG_ASSERT(thread != NULL);
6195
6196	gtid = thread->th.th_info.ds.ds_gtid;
6197
6198	if (!is_root) {
6199	if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6200	/ Assume the threads are at the fork barrier here /
6201	KA_TRACE(
6202	`20`, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6203	gtid));
6204	if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6205	while (
6206	!KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), `0`, `3`))
6207	KMP_CPU_PAUSE();
6208	__kmp_resume_32(target_gtid: gtid, flag: (kmp_flag_32<false, false> *)NULL);
6209	} else {
6210	/ Need release fence here to prevent seg faults for tree forkjoin*
6211	barrier (GEH) /*
6212	kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6213	thread);
6214	__kmp_release_64(flag: &flag);
6215	}
6216	}
6217
6218	// Terminate OS thread.
6219	__kmp_reap_worker(th: thread);
6220
6221	// The thread was killed asynchronously. If it was actively
6222	// spinning in the thread pool, decrement the global count.
6223	//
6224	// There is a small timing hole here - if the worker thread was just waking
6225	// up after sleeping in the pool, had reset it's th_active_in_pool flag but
6226	// not decremented the global counter __kmp_thread_pool_active_nth yet, then
6227	// the global counter might not get updated.
6228	//
6229	// Currently, this can only happen as the library is unloaded,
6230	// so there are no harmful side effects.
6231	if (thread->th.th_active_in_pool) {
6232	thread->th.th_active_in_pool = FALSE;
6233	KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6234	KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= `0`);
6235	}
6236	}
6237
6238	__kmp_free_implicit_task(this_thr: thread);
6239
6240	// Free the fast memory for tasking
6241	#if USE_FAST_MEMORY
6242	__kmp_free_fast_memory(this_thr: thread);
6243	#endif /* USE_FAST_MEMORY */
6244
6245	__kmp_suspend_uninitialize_thread(th: thread);
6246
6247	KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6248	TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6249
6250	--__kmp_all_nth;
6251	// __kmp_nth was decremented when thread is added to the pool.
6252
6253	#ifdef KMP_ADJUST_BLOCKTIME
6254	/ Adjust blocktime back to user setting or default if necessary /
6255	/ Middle initialization might never have occurred /
6256	if (!__kmp_env_blocktime && (__kmp_avail_proc > `0`)) {
6257	KMP_DEBUG_ASSERT(__kmp_avail_proc > `0`);
6258	if (__kmp_nth <= __kmp_avail_proc) {
6259	__kmp_zero_bt = FALSE;
6260	}
6261	}
6262	#endif /* KMP_ADJUST_BLOCKTIME */
6263
6264	/ free the memory being used /
6265	if (__kmp_env_consistency_check) {
6266	if (thread->th.th_cons) {
6267	__kmp_free_cons_stack(ptr: thread->th.th_cons);
6268	thread->th.th_cons = NULL;
6269	}
6270	}
6271
6272	if (thread->th.th_pri_common != NULL) {
6273	__kmp_free(thread->th.th_pri_common);
6274	thread->th.th_pri_common = NULL;
6275	}
6276
6277	#if KMP_USE_BGET
6278	if (thread->th.th_local.bget_data != NULL) {
6279	__kmp_finalize_bget(th: thread);
6280	}
6281	#endif
6282
6283	#if KMP_AFFINITY_SUPPORTED
6284	if (thread->th.th_affin_mask != NULL) {
6285	KMP_CPU_FREE(thread->th.th_affin_mask);
6286	thread->th.th_affin_mask = NULL;
6287	}
6288	#endif /* KMP_AFFINITY_SUPPORTED */
6289
6290	#if KMP_USE_HIER_SCHED
6291	if (thread->th.th_hier_bar_data != NULL) {
6292	__kmp_free(thread->th.th_hier_bar_data);
6293	thread->th.th_hier_bar_data = NULL;
6294	}
6295	#endif
6296
6297	__kmp_reap_team(team: thread->th.th_serial_team);
6298	thread->th.th_serial_team = NULL;
6299	__kmp_free(thread);
6300
6301	KMP_MB();
6302
6303	} // __kmp_reap_thread
6304
6305	static void __kmp_itthash_clean(kmp_info_t *th) {
6306	#if USE_ITT_NOTIFY
6307	if (__kmp_itt_region_domains.count > `0`) {
6308	for (int i = `0`; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6309	kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6310	while (bucket) {
6311	kmp_itthash_entry_t *next = bucket->next_in_bucket;
6312	__kmp_thread_free(th, bucket);
6313	bucket = next;
6314	}
6315	}
6316	}
6317	if (__kmp_itt_barrier_domains.count > `0`) {
6318	for (int i = `0`; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6319	kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6320	while (bucket) {
6321	kmp_itthash_entry_t *next = bucket->next_in_bucket;
6322	__kmp_thread_free(th, bucket);
6323	bucket = next;
6324	}
6325	}
6326	}
6327	#endif
6328	}
6329
6330	static void __kmp_internal_end(void) {
6331	int i;
6332
6333	/ First, unregister the library /
6334	__kmp_unregister_library();
6335
6336	#if KMP_OS_WINDOWS
6337	/ In Win static library, we can't tell when a root actually dies, so we*
6338	reclaim the data structures for any root threads that have died but not
6339	unregistered themselves, in order to shut down cleanly.
6340	In Win dynamic library we also can't tell when a thread dies. /*
6341	__kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
6342	// dead roots
6343	#endif
6344
6345	for (i = `0`; i < __kmp_threads_capacity; i++)
6346	if (__kmp_root[i])
6347	if (__kmp_root[i]->r.r_active)
6348	break;
6349	KMP_MB(); / Flush all pending memory write invalidates. /
6350	TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6351
6352	if (i < __kmp_threads_capacity) {
6353	#if KMP_USE_MONITOR
6354	// 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
6355	KMP_MB(); / Flush all pending memory write invalidates. /
6356
6357	// Need to check that monitor was initialized before reaping it. If we are
6358	// called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
6359	// __kmp_monitor will appear to contain valid data, but it is only valid in
6360	// the parent process, not the child.
6361	// New behavior (201008): instead of keying off of the flag
6362	// __kmp_init_parallel, the monitor thread creation is keyed off
6363	// of the new flag __kmp_init_monitor.
6364	__kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6365	if (TCR_4(__kmp_init_monitor)) {
6366	__kmp_reap_monitor(&__kmp_monitor);
6367	TCW_4(__kmp_init_monitor, `0`);
6368	}
6369	__kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6370	KA_TRACE(`10`, ("__kmp_internal_end: monitor reaped\n"));
6371	#endif // KMP_USE_MONITOR
6372	} else {
6373	/ TODO move this to cleanup code /
6374	#ifdef KMP_DEBUG
6375	/ make sure that everything has properly ended /
6376	for (i = `0`; i < __kmp_threads_capacity; i++) {
6377	if (__kmp_root[i]) {
6378	// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
6379	// there can be uber threads alive here
6380	KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active?
6381	}
6382	}
6383	#endif
6384
6385	KMP_MB();
6386
6387	// Reap the worker threads.
6388	// This is valid for now, but be careful if threads are reaped sooner.
6389	while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool.
6390	// Get the next thread from the pool.
6391	kmp_info_t thread = CCAST(kmp_info_t , __kmp_thread_pool);
6392	__kmp_thread_pool = thread->th.th_next_pool;
6393	// Reap it.
6394	KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6395	thread->th.th_next_pool = NULL;
6396	thread->th.th_in_pool = FALSE;
6397	__kmp_reap_thread(thread, is_root: `0`);
6398	}
6399	__kmp_thread_pool_insert_pt = NULL;
6400
6401	// Reap teams.
6402	while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool.
6403	// Get the next team from the pool.
6404	kmp_team_t team = CCAST(kmp_team_t , __kmp_team_pool);
6405	__kmp_team_pool = team->t.t_next_pool;
6406	// Reap it.
6407	team->t.t_next_pool = NULL;
6408	__kmp_reap_team(team);
6409	}
6410
6411	__kmp_reap_task_teams();
6412
6413	#if KMP_OS_UNIX
6414	// Threads that are not reaped should not access any resources since they
6415	// are going to be deallocated soon, so the shutdown sequence should wait
6416	// until all threads either exit the final spin-waiting loop or begin
6417	// sleeping after the given blocktime.
6418	for (i = `0`; i < __kmp_threads_capacity; i++) {
6419	kmp_info_t *thr = __kmp_threads[i];
6420	while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6421	KMP_CPU_PAUSE();
6422	}
6423	#endif
6424
6425	for (i = `0`; i < __kmp_threads_capacity; ++i) {
6426	// TBD: Add some checking...
6427	// Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
6428	}
6429
6430	/ Make sure all threadprivate destructors get run by joining with all*
6431	worker threads before resetting this flag /*
6432	TCW_SYNC_4(__kmp_init_common, FALSE);
6433
6434	KA_TRACE(`10`, ("__kmp_internal_end: all workers reaped\n"));
6435	KMP_MB();
6436
6437	#if KMP_USE_MONITOR
6438	// See note above: One of the possible fixes for CQ138434 / CQ140126
6439	//
6440	// FIXME: push both code fragments down and CSE them?
6441	// push them into __kmp_cleanup() ?
6442	__kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6443	if (TCR_4(__kmp_init_monitor)) {
6444	__kmp_reap_monitor(&__kmp_monitor);
6445	TCW_4(__kmp_init_monitor, `0`);
6446	}
6447	__kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6448	KA_TRACE(`10`, ("__kmp_internal_end: monitor reaped\n"));
6449	#endif
6450	} / else !__kmp_global.t_active /
6451	TCW_4(__kmp_init_gtid, FALSE);
6452	KMP_MB(); / Flush all pending memory write invalidates. /
6453
6454	__kmp_cleanup();
6455	#if OMPT_SUPPORT
6456	ompt_fini();
6457	#endif
6458	}
6459
6460	void __kmp_internal_end_library(int gtid_req) {
6461	/ if we have already cleaned up, don't try again, it wouldn't be pretty /
6462	/ this shouldn't be a race condition because __kmp_internal_end() is the*
6463	only place to clear __kmp_serial_init /*
6464	/ we'll check this later too, after we get the lock /
6465	// 2009-09-06: We do not set g_abort without setting g_done. This check looks
6466	// redundant, because the next check will work in any case.
6467	if (__kmp_global.g.g_abort) {
6468	KA_TRACE(`11`, ("__kmp_internal_end_library: abort, exiting\n"));
6469	/ TODO abort? /
6470	return;
6471	}
6472	if (TCR_4(__kmp_global.g.g_done) \|\| !__kmp_init_serial) {
6473	KA_TRACE(`10`, ("__kmp_internal_end_library: already finished\n"));
6474	return;
6475	}
6476
6477	// If hidden helper team has been initialized, we need to deinit it
6478	if (TCR_4(__kmp_init_hidden_helper) &&
6479	!TCR_4(__kmp_hidden_helper_team_done)) {
6480	TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6481	// First release the main thread to let it continue its work
6482	__kmp_hidden_helper_main_thread_release();
6483	// Wait until the hidden helper team has been destroyed
6484	__kmp_hidden_helper_threads_deinitz_wait();
6485	}
6486
6487	KMP_MB(); / Flush all pending memory write invalidates. /
6488	/ find out who we are and what we should do /
6489	{
6490	int gtid = (gtid_req >= `0`) ? gtid_req : __kmp_gtid_get_specific();
6491	KA_TRACE(
6492	`10`, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6493	if (gtid == KMP_GTID_SHUTDOWN) {
6494	KA_TRACE(`10`, ("__kmp_internal_end_library: !__kmp_init_runtime, system "
6495	"already shutdown\n"));
6496	return;
6497	} else if (gtid == KMP_GTID_MONITOR) {
6498	KA_TRACE(`10`, ("__kmp_internal_end_library: monitor thread, gtid not "
6499	"registered, or system shutdown\n"));
6500	return;
6501	} else if (gtid == KMP_GTID_DNE) {
6502	KA_TRACE(`10`, ("__kmp_internal_end_library: gtid not registered or system "
6503	"shutdown\n"));
6504	/ we don't know who we are, but we may still shutdown the library /
6505	} else if (KMP_UBER_GTID(gtid)) {
6506	/ unregister ourselves as an uber thread. gtid is no longer valid /
6507	if (__kmp_root[gtid]->r.r_active) {
6508	__kmp_global.g.g_abort = -`1`;
6509	TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6510	__kmp_unregister_library();
6511	KA_TRACE(`10`,
6512	("__kmp_internal_end_library: root still active, abort T#%d\n",
6513	gtid));
6514	return;
6515	} else {
6516	__kmp_itthash_clean(th: __kmp_threads[gtid]);
6517	KA_TRACE(
6518	`10`,
6519	("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6520	__kmp_unregister_root_current_thread(gtid);
6521	}
6522	} else {
6523	/ worker threads may call this function through the atexit handler, if they*
6524	* call exit() */
6525	/ For now, skip the usual subsequent processing and just dump the debug buffer.*
6526	TODO: do a thorough shutdown instead /*
6527	#ifdef DUMP_DEBUG_ON_EXIT
6528	if (__kmp_debug_buf)
6529	__kmp_dump_debug_buffer();
6530	#endif
6531	// added unregister library call here when we switch to shm linux
6532	// if we don't, it will leave lots of files in /dev/shm
6533	// cleanup shared memory file before exiting.
6534	__kmp_unregister_library();
6535	return;
6536	}
6537	}
6538	/ synchronize the termination process /
6539	__kmp_acquire_bootstrap_lock(lck: &__kmp_initz_lock);
6540
6541	/ have we already finished /
6542	if (__kmp_global.g.g_abort) {
6543	KA_TRACE(`10`, ("__kmp_internal_end_library: abort, exiting\n"));
6544	/ TODO abort? /
6545	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
6546	return;
6547	}
6548	if (TCR_4(__kmp_global.g.g_done) \|\| !__kmp_init_serial) {
6549	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
6550	return;
6551	}
6552
6553	/ We need this lock to enforce mutex between this reading of*
6554	__kmp_threads_capacity and the writing by __kmp_register_root.
6555	Alternatively, we can use a counter of roots that is atomically updated by
6556	__kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6557	__kmp_internal_end_. /
6558	__kmp_acquire_bootstrap_lock(lck: &__kmp_forkjoin_lock);
6559
6560	/ now we can safely conduct the actual termination /
6561	__kmp_internal_end();
6562
6563	__kmp_release_bootstrap_lock(lck: &__kmp_forkjoin_lock);
6564	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
6565
6566	KA_TRACE(`10`, ("__kmp_internal_end_library: exit\n"));
6567
6568	#ifdef DUMP_DEBUG_ON_EXIT
6569	if (__kmp_debug_buf)
6570	__kmp_dump_debug_buffer();
6571	#endif
6572
6573	#if KMP_OS_WINDOWS
6574	__kmp_close_console();
6575	#endif
6576
6577	__kmp_fini_allocator();
6578
6579	} // __kmp_internal_end_library
6580
6581	void __kmp_internal_end_thread(int gtid_req) {
6582	int i;
6583
6584	/ if we have already cleaned up, don't try again, it wouldn't be pretty /
6585	/ this shouldn't be a race condition because __kmp_internal_end() is the*
6586	* only place to clear __kmp_serial_init */
6587	/ we'll check this later too, after we get the lock /
6588	// 2009-09-06: We do not set g_abort without setting g_done. This check looks
6589	// redundant, because the next check will work in any case.
6590	if (__kmp_global.g.g_abort) {
6591	KA_TRACE(`11`, ("__kmp_internal_end_thread: abort, exiting\n"));
6592	/ TODO abort? /
6593	return;
6594	}
6595	if (TCR_4(__kmp_global.g.g_done) \|\| !__kmp_init_serial) {
6596	KA_TRACE(`10`, ("__kmp_internal_end_thread: already finished\n"));
6597	return;
6598	}
6599
6600	// If hidden helper team has been initialized, we need to deinit it
6601	if (TCR_4(__kmp_init_hidden_helper) &&
6602	!TCR_4(__kmp_hidden_helper_team_done)) {
6603	TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6604	// First release the main thread to let it continue its work
6605	__kmp_hidden_helper_main_thread_release();
6606	// Wait until the hidden helper team has been destroyed
6607	__kmp_hidden_helper_threads_deinitz_wait();
6608	}
6609
6610	KMP_MB(); / Flush all pending memory write invalidates. /
6611
6612	/ find out who we are and what we should do /
6613	{
6614	int gtid = (gtid_req >= `0`) ? gtid_req : __kmp_gtid_get_specific();
6615	KA_TRACE(`10`,
6616	("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6617	if (gtid == KMP_GTID_SHUTDOWN) {
6618	KA_TRACE(`10`, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
6619	"already shutdown\n"));
6620	return;
6621	} else if (gtid == KMP_GTID_MONITOR) {
6622	KA_TRACE(`10`, ("__kmp_internal_end_thread: monitor thread, gtid not "
6623	"registered, or system shutdown\n"));
6624	return;
6625	} else if (gtid == KMP_GTID_DNE) {
6626	KA_TRACE(`10`, ("__kmp_internal_end_thread: gtid not registered or system "
6627	"shutdown\n"));
6628	return;
6629	/ we don't know who we are /
6630	} else if (KMP_UBER_GTID(gtid)) {
6631	/ unregister ourselves as an uber thread. gtid is no longer valid /
6632	if (__kmp_root[gtid]->r.r_active) {
6633	__kmp_global.g.g_abort = -`1`;
6634	TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6635	KA_TRACE(`10`,
6636	("__kmp_internal_end_thread: root still active, abort T#%d\n",
6637	gtid));
6638	return;
6639	} else {
6640	KA_TRACE(`10`, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",
6641	gtid));
6642	__kmp_unregister_root_current_thread(gtid);
6643	}
6644	} else {
6645	/ just a worker thread, let's leave /
6646	KA_TRACE(`10`, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6647
6648	if (gtid >= `0`) {
6649	__kmp_threads[gtid]->th.th_task_team = NULL;
6650	}
6651
6652	KA_TRACE(`10`,
6653	("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6654	gtid));
6655	return;
6656	}
6657	}
6658	#if KMP_DYNAMIC_LIB
6659	if (__kmp_pause_status != kmp_hard_paused)
6660	// AC: lets not shutdown the dynamic library at the exit of uber thread,
6661	// because we will better shutdown later in the library destructor.
6662	{
6663	KA_TRACE(`10`, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6664	return;
6665	}
6666	#endif
6667	/ synchronize the termination process /
6668	__kmp_acquire_bootstrap_lock(lck: &__kmp_initz_lock);
6669
6670	/ have we already finished /
6671	if (__kmp_global.g.g_abort) {
6672	KA_TRACE(`10`, ("__kmp_internal_end_thread: abort, exiting\n"));
6673	/ TODO abort? /
6674	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
6675	return;
6676	}
6677	if (TCR_4(__kmp_global.g.g_done) \|\| !__kmp_init_serial) {
6678	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
6679	return;
6680	}
6681
6682	/ We need this lock to enforce mutex between this reading of*
6683	__kmp_threads_capacity and the writing by __kmp_register_root.
6684	Alternatively, we can use a counter of roots that is atomically updated by
6685	__kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6686	__kmp_internal_end_. /
6687
6688	/ should we finish the run-time? are all siblings done? /
6689	__kmp_acquire_bootstrap_lock(lck: &__kmp_forkjoin_lock);
6690
6691	for (i = `0`; i < __kmp_threads_capacity; ++i) {
6692	if (KMP_UBER_GTID(gtid: i)) {
6693	KA_TRACE(
6694	`10`,
6695	("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6696	__kmp_release_bootstrap_lock(lck: &__kmp_forkjoin_lock);
6697	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
6698	return;
6699	}
6700	}
6701
6702	/ now we can safely conduct the actual termination /
6703
6704	__kmp_internal_end();
6705
6706	__kmp_release_bootstrap_lock(lck: &__kmp_forkjoin_lock);
6707	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
6708
6709	KA_TRACE(`10`, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6710
6711	#ifdef DUMP_DEBUG_ON_EXIT
6712	if (__kmp_debug_buf)
6713	__kmp_dump_debug_buffer();
6714	#endif
6715	} // __kmp_internal_end_thread
6716
6717	// -----------------------------------------------------------------------------
6718	// Library registration stuff.
6719
6720	static long __kmp_registration_flag = `0`;
6721	// Random value used to indicate library initialization.
6722	static char *__kmp_registration_str = NULL;
6723	// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
6724
6725	static inline char *__kmp_reg_status_name() {
6726	/ On RHEL 3u5 if linked statically, getpid() returns different values in*
6727	each thread. If registration and unregistration go in different threads
6728	(omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6729	env var can not be found, because the name will contain different pid. /*
6730	// macOS complains about name being too long with additional getuid()*
6731	#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6732	return __kmp_str_format(format: "__KMP_REGISTERED_LIB_%d_%d", (int)getpid(),
6733	(int)getuid());
6734	#else
6735	return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
6736	#endif
6737	} // __kmp_reg_status_get
6738
6739	#if defined(KMP_USE_SHM)
6740	bool __kmp_shm_available = false;
6741	bool __kmp_tmp_available = false;
6742	// If /dev/shm is not accessible, we will create a temporary file under /tmp.
6743	char temp_reg_status_file_name = nullptr*;
6744	#endif
6745
6746	void __kmp_register_library_startup(void) {
6747
6748	char name = __kmp_reg_status_name(); // Name of the environment variable.*
6749	int done = `0`;
6750	union {
6751	double dtime;
6752	long ltime;
6753	} time;
6754	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
6755	__kmp_initialize_system_tick();
6756	#endif
6757	__kmp_read_system_time(delta: &time.dtime);
6758	__kmp_registration_flag = `0xCAFE0000L` \| (time.ltime & `0x0000FFFFL`);
6759	__kmp_registration_str =
6760	__kmp_str_format(format: "%p-%lx-%s", &__kmp_registration_flag,
6761	__kmp_registration_flag, KMP_LIBRARY_FILE);
6762
6763	KA_TRACE(`50`, ("__kmp_register_library_startup: %s=\"%s\"\n", name,
6764	__kmp_registration_str));
6765
6766	while (!done) {
6767
6768	char value = NULL; // Actual value of the environment variable.*
6769
6770	#if defined(KMP_USE_SHM)
6771	char shm_name = nullptr*;
6772	char data1 = nullptr*;
6773	__kmp_shm_available = __kmp_detect_shm();
6774	if (__kmp_shm_available) {
6775	int fd1 = -`1`;
6776	shm_name = __kmp_str_format(format: "/%s", name);
6777	int shm_preexist = `0`;
6778	fd1 = shm_open(name: shm_name, O_CREAT \| O_EXCL \| O_RDWR, mode: `0600`);
6779	if ((fd1 == -`1`) && (errno == EEXIST)) {
6780	// file didn't open because it already exists.
6781	// try opening existing file
6782	fd1 = shm_open(name: shm_name, O_RDWR, mode: `0600`);
6783	if (fd1 == -`1`) { // file didn't open
6784	KMP_WARNING(FunctionError, "Can't open SHM");
6785	__kmp_shm_available = false;
6786	} else { // able to open existing file
6787	shm_preexist = `1`;
6788	}
6789	}
6790	if (__kmp_shm_available && shm_preexist == `0`) { // SHM created, set size
6791	if (ftruncate(fd: fd1, SHM_SIZE) == -`1`) { // error occured setting size;
6792	KMP_WARNING(FunctionError, "Can't set size of SHM");
6793	__kmp_shm_available = false;
6794	}
6795	}
6796	if (__kmp_shm_available) { // SHM exists, now map it
6797	data1 = (char *)mmap(addr: `0`, SHM_SIZE, PROT_READ \| PROT_WRITE, MAP_SHARED,
6798	fd: fd1, offset: `0`);
6799	if (data1 == MAP_FAILED) { // failed to map shared memory
6800	KMP_WARNING(FunctionError, "Can't map SHM");
6801	__kmp_shm_available = false;
6802	}
6803	}
6804	if (__kmp_shm_available) { // SHM mapped
6805	if (shm_preexist == `0`) { // set data to SHM, set value
6806	KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6807	}
6808	// Read value from either what we just wrote or existing file.
6809	value = __kmp_str_format(format: "%s", data1); // read value from SHM
6810	munmap(addr: data1, SHM_SIZE);
6811	}
6812	if (fd1 != -`1`)
6813	close(fd: fd1);
6814	}
6815	if (!__kmp_shm_available)
6816	__kmp_tmp_available = __kmp_detect_tmp();
6817	if (!__kmp_shm_available && __kmp_tmp_available) {
6818	// SHM failed to work due to an error other than that the file already
6819	// exists. Try to create a temp file under /tmp.
6820	// If /tmp isn't accessible, fall back to using environment variable.
6821	// TODO: /tmp might not always be the temporary directory. For now we will
6822	// not consider TMPDIR.
6823	int fd1 = -`1`;
6824	temp_reg_status_file_name = __kmp_str_format(format: "/tmp/%s", name);
6825	int tmp_preexist = `0`;
6826	fd1 = open(file: temp_reg_status_file_name, O_CREAT \| O_EXCL \| O_RDWR, `0600`);
6827	if ((fd1 == -`1`) && (errno == EEXIST)) {
6828	// file didn't open because it already exists.
6829	// try opening existing file
6830	fd1 = open(file: temp_reg_status_file_name, O_RDWR, `0600`);
6831	if (fd1 == -`1`) { // file didn't open if (fd1 == -1) {
6832	KMP_WARNING(FunctionError, "Can't open TEMP");
6833	__kmp_tmp_available = false;
6834	} else {
6835	tmp_preexist = `1`;
6836	}
6837	}
6838	if (__kmp_tmp_available && tmp_preexist == `0`) {
6839	// we created /tmp file now set size
6840	if (ftruncate(fd: fd1, SHM_SIZE) == -`1`) { // error occured setting size;
6841	KMP_WARNING(FunctionError, "Can't set size of /tmp file");
6842	__kmp_tmp_available = false;
6843	}
6844	}
6845	if (__kmp_tmp_available) {
6846	data1 = (char *)mmap(addr: `0`, SHM_SIZE, PROT_READ \| PROT_WRITE, MAP_SHARED,
6847	fd: fd1, offset: `0`);
6848	if (data1 == MAP_FAILED) { // failed to map /tmp
6849	KMP_WARNING(FunctionError, "Can't map /tmp");
6850	__kmp_tmp_available = false;
6851	}
6852	}
6853	if (__kmp_tmp_available) {
6854	if (tmp_preexist == `0`) { // set data to TMP, set value
6855	KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6856	}
6857	// Read value from either what we just wrote or existing file.
6858	value = __kmp_str_format(format: "%s", data1); // read value from SHM
6859	munmap(addr: data1, SHM_SIZE);
6860	}
6861	if (fd1 != -`1`)
6862	close(fd: fd1);
6863	}
6864	if (!__kmp_shm_available && !__kmp_tmp_available) {
6865	// no /dev/shm and no /tmp -- fall back to environment variable
6866	// Set environment variable, but do not overwrite if it exists.
6867	__kmp_env_set(name, value: __kmp_registration_str, overwrite: `0`);
6868	// read value to see if it got set
6869	value = __kmp_env_get(name);
6870	}
6871	#else // Windows and unix with static library
6872	// Set environment variable, but do not overwrite if it exists.
6873	__kmp_env_set(name, __kmp_registration_str, `0`);
6874	// read value to see if it got set
6875	value = __kmp_env_get(name);
6876	#endif
6877
6878	if (value != NULL && strcmp(s1: value, s2: __kmp_registration_str) == `0`) {
6879	done = `1`; // Ok, environment variable set successfully, exit the loop.
6880	} else {
6881	// Oops. Write failed. Another copy of OpenMP RTL is in memory.
6882	// Check whether it alive or dead.
6883	int neighbor = `0`; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6884	char *tail = value;
6885	char *flag_addr_str = NULL;
6886	char *flag_val_str = NULL;
6887	char const *file_name = NULL;
6888	__kmp_str_split(str: tail, delim: `'-'`, head: &flag_addr_str, tail: &tail);
6889	__kmp_str_split(str: tail, delim: `'-'`, head: &flag_val_str, tail: &tail);
6890	file_name = tail;
6891	if (tail != NULL) {
6892	unsigned long *flag_addr = `0`;
6893	unsigned long flag_val = `0`;
6894	KMP_SSCANF(s: flag_addr_str, format: "%p", RCAST(void **, &flag_addr));
6895	KMP_SSCANF(s: flag_val_str, format: "%lx", &flag_val);
6896	if (flag_addr != `0` && flag_val != `0` && strcmp(s1: file_name, s2: "") != `0`) {
6897	// First, check whether environment-encoded address is mapped into
6898	// addr space.
6899	// If so, dereference it to see if it still has the right value.
6900	if (__kmp_is_address_mapped(addr: flag_addr) && *flag_addr == flag_val) {
6901	neighbor = `1`;
6902	} else {
6903	// If not, then we know the other copy of the library is no longer
6904	// running.
6905	neighbor = `2`;
6906	}
6907	}
6908	}
6909	switch (neighbor) {
6910	case `0`: // Cannot parse environment variable -- neighbor status unknown.
6911	// Assume it is the incompatible format of future version of the
6912	// library. Assume the other library is alive.
6913	// WARN( ... ); // TODO: Issue a warning.
6914	file_name = "unknown library";
6915	KMP_FALLTHROUGH();
6916	// Attention! Falling to the next case. That's intentional.
6917	case `1`: { // Neighbor is alive.
6918	// Check it is allowed.
6919	char *duplicate_ok = __kmp_env_get(name: "KMP_DUPLICATE_LIB_OK");
6920	if (!__kmp_str_match_true(data: duplicate_ok)) {
6921	// That's not allowed. Issue fatal error.
6922	__kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6923	KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6924	}
6925	KMP_INTERNAL_FREE(duplicate_ok);
6926	__kmp_duplicate_library_ok = `1`;
6927	done = `1`; // Exit the loop.
6928	} break;
6929	case `2`: { // Neighbor is dead.
6930
6931	#if defined(KMP_USE_SHM)
6932	if (__kmp_shm_available) { // close shared memory.
6933	shm_unlink(name: shm_name); // this removes file in /dev/shm
6934	} else if (__kmp_tmp_available) {
6935	unlink(name: temp_reg_status_file_name); // this removes the temp file
6936	} else {
6937	// Clear the variable and try to register library again.
6938	__kmp_env_unset(name);
6939	}
6940	#else
6941	// Clear the variable and try to register library again.
6942	__kmp_env_unset(name);
6943	#endif
6944	} break;
6945	default: {
6946	KMP_DEBUG_ASSERT(`0`);
6947	} break;
6948	}
6949	}
6950	KMP_INTERNAL_FREE((void *)value);
6951	#if defined(KMP_USE_SHM)
6952	if (shm_name)
6953	KMP_INTERNAL_FREE((void *)shm_name);
6954	#endif
6955	} // while
6956	KMP_INTERNAL_FREE((void *)name);
6957
6958	} // func __kmp_register_library_startup
6959
6960	void __kmp_unregister_library(void) {
6961
6962	char *name = __kmp_reg_status_name();
6963	char *value = NULL;
6964
6965	#if defined(KMP_USE_SHM)
6966	char shm_name = nullptr*;
6967	int fd1;
6968	if (__kmp_shm_available) {
6969	shm_name = __kmp_str_format(format: "/%s", name);
6970	fd1 = shm_open(name: shm_name, O_RDONLY, mode: `0600`);
6971	if (fd1 != -`1`) { // File opened successfully
6972	char data1 = (char* *)mmap(addr: `0`, SHM_SIZE, PROT_READ, MAP_SHARED, fd: fd1, offset: `0`);
6973	if (data1 != MAP_FAILED) {
6974	value = __kmp_str_format(format: "%s", data1); // read value from SHM
6975	munmap(addr: data1, SHM_SIZE);
6976	}
6977	close(fd: fd1);
6978	}
6979	} else if (__kmp_tmp_available) { // try /tmp
6980	fd1 = open(file: temp_reg_status_file_name, O_RDONLY);
6981	if (fd1 != -`1`) { // File opened successfully
6982	char data1 = (char* *)mmap(addr: `0`, SHM_SIZE, PROT_READ, MAP_SHARED, fd: fd1, offset: `0`);
6983	if (data1 != MAP_FAILED) {
6984	value = __kmp_str_format(format: "%s", data1); // read value from /tmp
6985	munmap(addr: data1, SHM_SIZE);
6986	}
6987	close(fd: fd1);
6988	}
6989	} else { // fall back to envirable
6990	value = __kmp_env_get(name);
6991	}
6992	#else
6993	value = __kmp_env_get(name);
6994	#endif
6995
6996	KMP_DEBUG_ASSERT(__kmp_registration_flag != `0`);
6997	KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6998	if (value != NULL && strcmp(s1: value, s2: __kmp_registration_str) == `0`) {
6999	// Ok, this is our variable. Delete it.
7000	#if defined(KMP_USE_SHM)
7001	if (__kmp_shm_available) {
7002	shm_unlink(name: shm_name); // this removes file in /dev/shm
7003	} else if (__kmp_tmp_available) {
7004	unlink(name: temp_reg_status_file_name); // this removes the temp file
7005	} else {
7006	__kmp_env_unset(name);
7007	}
7008	#else
7009	__kmp_env_unset(name);
7010	#endif
7011	}
7012
7013	#if defined(KMP_USE_SHM)
7014	if (shm_name)
7015	KMP_INTERNAL_FREE(shm_name);
7016	if (temp_reg_status_file_name)
7017	KMP_INTERNAL_FREE(temp_reg_status_file_name);
7018	#endif
7019
7020	KMP_INTERNAL_FREE(__kmp_registration_str);
7021	KMP_INTERNAL_FREE(value);
7022	KMP_INTERNAL_FREE(name);
7023
7024	__kmp_registration_flag = `0`;
7025	__kmp_registration_str = NULL;
7026
7027	} // __kmp_unregister_library
7028
7029	// End of Library registration stuff.
7030	// -----------------------------------------------------------------------------
7031
7032	#if KMP_MIC_SUPPORTED
7033
7034	static void __kmp_check_mic_type() {
7035	kmp_cpuid_t cpuid_state = {.eax: `0`};
7036	kmp_cpuid_t *cs_p = &cpuid_state;
7037	__kmp_x86_cpuid(leaf: `1`, subleaf: `0`, p: cs_p);
7038	// We don't support mic1 at the moment
7039	if ((cs_p->eax & `0xff0`) == `0xB10`) {
7040	__kmp_mic_type = mic2;
7041	} else if ((cs_p->eax & `0xf0ff0`) == `0x50670`) {
7042	__kmp_mic_type = mic3;
7043	} else {
7044	__kmp_mic_type = non_mic;
7045	}
7046	}
7047
7048	#endif /* KMP_MIC_SUPPORTED */
7049
7050	#if KMP_HAVE_UMWAIT
7051	static void __kmp_user_level_mwait_init() {
7052	struct kmp_cpuid buf;
7053	__kmp_x86_cpuid(leaf: `7`, subleaf: `0`, p: &buf);
7054	__kmp_waitpkg_enabled = ((buf.ecx >> `5`) & `1`);
7055	__kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
7056	__kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > `0`);
7057	KF_TRACE(`30`, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
7058	__kmp_umwait_enabled));
7059	}
7060	#elif KMP_HAVE_MWAIT
7061	#ifndef AT_INTELPHIUSERMWAIT
7062	// Spurious, non-existent value that should always fail to return anything.
7063	// Will be replaced with the correct value when we know that.
7064	#define AT_INTELPHIUSERMWAIT 10000
7065	#endif
7066	// getauxval() function is available in RHEL7 and SLES12. If a system with an
7067	// earlier OS is used to build the RTL, we'll use the following internal
7068	// function when the entry is not found.
7069	unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7070	unsigned long getauxval(unsigned long) { return `0`; }
7071
7072	static void __kmp_user_level_mwait_init() {
7073	// When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available
7074	// use them to find if the user-level mwait is enabled. Otherwise, forcibly
7075	// set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable
7076	// KMP_USER_LEVEL_MWAIT was set to TRUE.
7077	if (__kmp_mic_type == mic3) {
7078	unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7079	if ((res & `0x1`) \|\| __kmp_user_level_mwait) {
7080	__kmp_mwait_enabled = TRUE;
7081	if (__kmp_user_level_mwait) {
7082	KMP_INFORM(EnvMwaitWarn);
7083	}
7084	} else {
7085	__kmp_mwait_enabled = FALSE;
7086	}
7087	}
7088	KF_TRACE(`30`, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7089	"__kmp_mwait_enabled = %d\n",
7090	__kmp_mic_type, __kmp_mwait_enabled));
7091	}
7092	#endif /* KMP_HAVE_UMWAIT */
7093
7094	static void __kmp_do_serial_initialize(void) {
7095	int i, gtid;
7096	size_t size;
7097
7098	KA_TRACE(`10`, ("__kmp_do_serial_initialize: enter\n"));
7099
7100	KMP_DEBUG_ASSERT(sizeof(kmp_int32) == `4`);
7101	KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == `4`);
7102	KMP_DEBUG_ASSERT(sizeof(kmp_int64) == `8`);
7103	KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == `8`);
7104	KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *));
7105
7106	#if OMPT_SUPPORT
7107	ompt_pre_init();
7108	#endif
7109	#if OMPD_SUPPORT
7110	__kmp_env_dump();
7111	ompd_init();
7112	#endif
7113
7114	__kmp_validate_locks();
7115
7116	#if ENABLE_LIBOMPTARGET
7117	/ Initialize functions from libomptarget /
7118	__kmp_init_omptarget();
7119	#endif
7120
7121	/ Initialize internal memory allocator /
7122	__kmp_init_allocator();
7123
7124	/ Register the library startup via an environment variable or via mapped*
7125	shared memory file and check to see whether another copy of the library is
7126	already registered. Since forked child process is often terminated, we
7127	postpone the registration till middle initialization in the child /*
7128	if (__kmp_need_register_serial)
7129	__kmp_register_library_startup();
7130
7131	/ TODO reinitialization of library /
7132	if (TCR_4(__kmp_global.g.g_done)) {
7133	KA_TRACE(`10`, ("__kmp_do_serial_initialize: reinitialization of library\n"));
7134	}
7135
7136	__kmp_global.g.g_abort = `0`;
7137	TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7138
7139	/ initialize the locks /
7140	#if KMP_USE_ADAPTIVE_LOCKS
7141	#if KMP_DEBUG_ADAPTIVE_LOCKS
7142	__kmp_init_speculative_stats();
7143	#endif
7144	#endif
7145	#if KMP_STATS_ENABLED
7146	__kmp_stats_init();
7147	#endif
7148	__kmp_init_lock(lck: &__kmp_global_lock);
7149	__kmp_init_atomic_lock(lck: &__kmp_atomic_lock);
7150	__kmp_init_atomic_lock(lck: &__kmp_atomic_lock_1i);
7151	__kmp_init_atomic_lock(lck: &__kmp_atomic_lock_2i);
7152	__kmp_init_atomic_lock(lck: &__kmp_atomic_lock_4i);
7153	__kmp_init_atomic_lock(lck: &__kmp_atomic_lock_4r);
7154	__kmp_init_atomic_lock(lck: &__kmp_atomic_lock_8i);
7155	__kmp_init_atomic_lock(lck: &__kmp_atomic_lock_8r);
7156	__kmp_init_atomic_lock(lck: &__kmp_atomic_lock_8c);
7157	__kmp_init_atomic_lock(lck: &__kmp_atomic_lock_10r);
7158	__kmp_init_atomic_lock(lck: &__kmp_atomic_lock_16r);
7159	__kmp_init_atomic_lock(lck: &__kmp_atomic_lock_16c);
7160	__kmp_init_atomic_lock(lck: &__kmp_atomic_lock_20c);
7161	__kmp_init_atomic_lock(lck: &__kmp_atomic_lock_32c);
7162	__kmp_init_bootstrap_lock(lck: &__kmp_forkjoin_lock);
7163	__kmp_init_bootstrap_lock(lck: &__kmp_exit_lock);
7164	#if KMP_USE_MONITOR
7165	__kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7166	#endif
7167	__kmp_init_bootstrap_lock(lck: &__kmp_tp_cached_lock);
7168
7169	/ conduct initialization and initial setup of configuration /
7170
7171	__kmp_runtime_initialize();
7172
7173	#if KMP_MIC_SUPPORTED
7174	__kmp_check_mic_type();
7175	#endif
7176
7177	// Some global variable initialization moved here from kmp_env_initialize()
7178	#ifdef KMP_DEBUG
7179	kmp_diag = `0`;
7180	#endif
7181	__kmp_abort_delay = `0`;
7182
7183	// From __kmp_init_dflt_team_nth()
7184	/ assume the entire machine will be used /
7185	__kmp_dflt_team_nth_ub = __kmp_xproc;
7186	if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7187	__kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7188	}
7189	if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7190	__kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7191	}
7192	__kmp_max_nth = __kmp_sys_max_nth;
7193	__kmp_cg_max_nth = __kmp_sys_max_nth;
7194	__kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default
7195	if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7196	__kmp_teams_max_nth = __kmp_sys_max_nth;
7197	}
7198
7199	// Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
7200	// part
7201	__kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7202	#if KMP_USE_MONITOR
7203	__kmp_monitor_wakeups =
7204	KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7205	__kmp_bt_intervals =
7206	KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7207	#endif
7208	// From "KMP_LIBRARY" part of __kmp_env_initialize()
7209	__kmp_library = library_throughput;
7210	// From KMP_SCHEDULE initialization
7211	__kmp_static = kmp_sch_static_balanced;
7212	// AC: do not use analytical here, because it is non-monotonous
7213	//__kmp_guided = kmp_sch_guided_iterative_chunked;
7214	//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
7215	// need to repeat assignment
7216	// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
7217	// bit control and barrier method control parts
7218	#if KMP_FAST_REDUCTION_BARRIER
7219	#define kmp_reduction_barrier_gather_bb ((int)1)
7220	#define kmp_reduction_barrier_release_bb ((int)1)
7221	#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7222	#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7223	#endif // KMP_FAST_REDUCTION_BARRIER
7224	for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7225	__kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7226	__kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7227	__kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7228	__kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7229	#if KMP_FAST_REDUCTION_BARRIER
7230	if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
7231	// lin_64 ): hyper,1
7232	__kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7233	__kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7234	__kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7235	__kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7236	}
7237	#endif // KMP_FAST_REDUCTION_BARRIER
7238	}
7239	#if KMP_FAST_REDUCTION_BARRIER
7240	#undef kmp_reduction_barrier_release_pat
7241	#undef kmp_reduction_barrier_gather_pat
7242	#undef kmp_reduction_barrier_release_bb
7243	#undef kmp_reduction_barrier_gather_bb
7244	#endif // KMP_FAST_REDUCTION_BARRIER
7245	#if KMP_MIC_SUPPORTED
7246	if (__kmp_mic_type == mic2) { // KNC
7247	// AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
7248	__kmp_barrier_gather_branch_bits[bs_plain_barrier] = `3`; // plain gather
7249	__kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7250	`1`; // forkjoin release
7251	__kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7252	__kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7253	}
7254	#if KMP_FAST_REDUCTION_BARRIER
7255	if (__kmp_mic_type == mic2) { // KNC
7256	__kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7257	__kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7258	}
7259	#endif // KMP_FAST_REDUCTION_BARRIER
7260	#endif // KMP_MIC_SUPPORTED
7261
7262	// From KMP_CHECKS initialization
7263	#ifdef KMP_DEBUG
7264	__kmp_env_checks = TRUE; / development versions have the extra checks /
7265	#else
7266	__kmp_env_checks = FALSE; / port versions do not have the extra checks /
7267	#endif
7268
7269	// From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
7270	__kmp_foreign_tp = TRUE;
7271
7272	__kmp_global.g.g_dynamic = FALSE;
7273	__kmp_global.g.g_dynamic_mode = dynamic_default;
7274
7275	__kmp_init_nesting_mode();
7276
7277	__kmp_env_initialize(NULL);
7278
7279	#if KMP_HAVE_MWAIT \|\| KMP_HAVE_UMWAIT
7280	__kmp_user_level_mwait_init();
7281	#endif
7282	// Print all messages in message catalog for testing purposes.
7283	#ifdef KMP_DEBUG
7284	char const *val = __kmp_env_get(name: "KMP_DUMP_CATALOG");
7285	if (__kmp_str_match_true(data: val)) {
7286	kmp_str_buf_t buffer;
7287	__kmp_str_buf_init(&buffer);
7288	__kmp_i18n_dump_catalog(buffer: &buffer);
7289	__kmp_printf(format: "%s", buffer.str);
7290	__kmp_str_buf_free(buffer: &buffer);
7291	}
7292	__kmp_env_free(value: &val);
7293	#endif
7294
7295	__kmp_threads_capacity =
7296	__kmp_initial_threads_capacity(req_nproc: __kmp_dflt_team_nth_ub);
7297	// Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
7298	__kmp_tp_capacity = __kmp_default_tp_capacity(
7299	__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7300
7301	// If the library is shut down properly, both pools must be NULL. Just in
7302	// case, set them to NULL -- some memory may leak, but subsequent code will
7303	// work even if pools are not freed.
7304	KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7305	KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7306	KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7307	__kmp_thread_pool = NULL;
7308	__kmp_thread_pool_insert_pt = NULL;
7309	__kmp_team_pool = NULL;
7310
7311	/ Allocate all of the variable sized records /
7312	/ NOTE: __kmp_threads_capacity entries are allocated, but the arrays are*
7313	* expandable */
7314	/ Since allocation is cache-aligned, just add extra padding at the end /
7315	size =
7316	(sizeof(kmp_info_t ) + sizeof(kmp_root_t )) * __kmp_threads_capacity +
7317	CACHE_LINE;
7318	__kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7319	__kmp_root = (kmp_root_t *)((char* *)__kmp_threads +
7320	sizeof(kmp_info_t ) __kmp_threads_capacity);
7321
7322	/ init thread counts /
7323	KMP_DEBUG_ASSERT(__kmp_all_nth ==
7324	`0`); // Asserts fail if the library is reinitializing and
7325	KMP_DEBUG_ASSERT(__kmp_nth == `0`); // something was wrong in termination.
7326	__kmp_all_nth = `0`;
7327	__kmp_nth = `0`;
7328
7329	/ setup the uber master thread and hierarchy /
7330	gtid = __kmp_register_root(TRUE);
7331	KA_TRACE(`10`, ("__kmp_do_serial_initialize T#%d\n", gtid));
7332	KMP_ASSERT(KMP_UBER_GTID(gtid));
7333	KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7334
7335	KMP_MB(); / Flush all pending memory write invalidates. /
7336
7337	__kmp_common_initialize();
7338
7339	#if KMP_OS_UNIX
7340	/ invoke the child fork handler /
7341	__kmp_register_atfork();
7342	#endif
7343
7344	#if !KMP_DYNAMIC_LIB \|\| \
7345	((KMP_COMPILER_ICC \|\| KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7346	{
7347	/ Invoke the exit handler when the program finishes, only for static*
7348	library and macOS dynamic. For other dynamic libraries, we already*
7349	have _fini and DllMain. /*
7350	int rc = atexit(__kmp_internal_end_atexit);
7351	if (rc != `0`) {
7352	__kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc),
7353	__kmp_msg_null);
7354	}
7355	}
7356	#endif
7357
7358	#if KMP_HANDLE_SIGNALS
7359	#if KMP_OS_UNIX
7360	/ NOTE: make sure that this is called before the user installs their own*
7361	signal handlers so that the user handlers are called first. this way they
7362	can return false, not call our handler, avoid terminating the library, and
7363	continue execution where they left off. /*
7364	__kmp_install_signals(FALSE);
7365	#endif /* KMP_OS_UNIX */
7366	#if KMP_OS_WINDOWS
7367	__kmp_install_signals(TRUE);
7368	#endif /* KMP_OS_WINDOWS */
7369	#endif
7370
7371	/ we have finished the serial initialization /
7372	__kmp_init_counter++;
7373
7374	__kmp_init_serial = TRUE;
7375
7376	if (__kmp_version) {
7377	__kmp_print_version_1();
7378	}
7379
7380	if (__kmp_settings) {
7381	__kmp_env_print();
7382	}
7383
7384	if (__kmp_display_env \|\| __kmp_display_env_verbose) {
7385	__kmp_env_print_2();
7386	}
7387
7388	#if OMPT_SUPPORT
7389	ompt_post_init();
7390	#endif
7391
7392	KMP_MB();
7393
7394	KA_TRACE(`10`, ("__kmp_do_serial_initialize: exit\n"));
7395	}
7396
7397	void __kmp_serial_initialize(void) {
7398	if (__kmp_init_serial) {
7399	return;
7400	}
7401	__kmp_acquire_bootstrap_lock(lck: &__kmp_initz_lock);
7402	if (__kmp_init_serial) {
7403	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
7404	return;
7405	}
7406	__kmp_do_serial_initialize();
7407	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
7408	}
7409
7410	static void __kmp_do_middle_initialize(void) {
7411	int i, j;
7412	int prev_dflt_team_nth;
7413
7414	if (!__kmp_init_serial) {
7415	__kmp_do_serial_initialize();
7416	}
7417
7418	KA_TRACE(`10`, ("__kmp_middle_initialize: enter\n"));
7419
7420	if (UNLIKELY(!__kmp_need_register_serial)) {
7421	// We are in a forked child process. The registration was skipped during
7422	// serial initialization in __kmp_atfork_child handler. Do it here.
7423	__kmp_register_library_startup();
7424	}
7425
7426	// Save the previous value for the __kmp_dflt_team_nth so that
7427	// we can avoid some reinitialization if it hasn't changed.
7428	prev_dflt_team_nth = __kmp_dflt_team_nth;
7429
7430	#if KMP_AFFINITY_SUPPORTED
7431	// __kmp_affinity_initialize() will try to set __kmp_ncores to the
7432	// number of cores on the machine.
7433	__kmp_affinity_initialize(affinity&: __kmp_affinity);
7434
7435	#endif /* KMP_AFFINITY_SUPPORTED */
7436
7437	KMP_ASSERT(__kmp_xproc > `0`);
7438	if (__kmp_avail_proc == `0`) {
7439	__kmp_avail_proc = __kmp_xproc;
7440	}
7441
7442	// If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
7443	// correct them now
7444	j = `0`;
7445	while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7446	__kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7447	__kmp_avail_proc;
7448	j++;
7449	}
7450
7451	if (__kmp_dflt_team_nth == `0`) {
7452	#ifdef KMP_DFLT_NTH_CORES
7453	// Default #threads = #cores
7454	__kmp_dflt_team_nth = __kmp_ncores;
7455	KA_TRACE(`20`, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7456	"__kmp_ncores (%d)\n",
7457	__kmp_dflt_team_nth));
7458	#else
7459	// Default #threads = #available OS procs
7460	__kmp_dflt_team_nth = __kmp_avail_proc;
7461	KA_TRACE(`20`, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7462	"__kmp_avail_proc(%d)\n",
7463	__kmp_dflt_team_nth));
7464	#endif /* KMP_DFLT_NTH_CORES */
7465	}
7466
7467	if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7468	__kmp_dflt_team_nth = KMP_MIN_NTH;
7469	}
7470	if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7471	__kmp_dflt_team_nth = __kmp_sys_max_nth;
7472	}
7473
7474	if (__kmp_nesting_mode > `0`)
7475	__kmp_set_nesting_mode_threads();
7476
7477	// There's no harm in continuing if the following check fails,
7478	// but it indicates an error in the previous logic.
7479	KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7480
7481	if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7482	// Run through the __kmp_threads array and set the num threads icv for each
7483	// root thread that is currently registered with the RTL (which has not
7484	// already explicitly set its nthreads-var with a call to
7485	// omp_set_num_threads()).
7486	for (i = `0`; i < __kmp_threads_capacity; i++) {
7487	kmp_info_t *thread = __kmp_threads[i];
7488	if (thread == NULL)
7489	continue;
7490	if (thread->th.th_current_task->td_icvs.nproc != `0`)
7491	continue;
7492
7493	set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7494	}
7495	}
7496	KA_TRACE(
7497	`20`,
7498	("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7499	__kmp_dflt_team_nth));
7500
7501	#ifdef KMP_ADJUST_BLOCKTIME
7502	/ Adjust blocktime to zero if necessary now that __kmp_avail_proc is set /
7503	if (!__kmp_env_blocktime && (__kmp_avail_proc > `0`)) {
7504	KMP_DEBUG_ASSERT(__kmp_avail_proc > `0`);
7505	if (__kmp_nth > __kmp_avail_proc) {
7506	__kmp_zero_bt = TRUE;
7507	}
7508	}
7509	#endif /* KMP_ADJUST_BLOCKTIME */
7510
7511	/ we have finished middle initialization /
7512	TCW_SYNC_4(__kmp_init_middle, TRUE);
7513
7514	KA_TRACE(`10`, ("__kmp_do_middle_initialize: exit\n"));
7515	}
7516
7517	void __kmp_middle_initialize(void) {
7518	if (__kmp_init_middle) {
7519	return;
7520	}
7521	__kmp_acquire_bootstrap_lock(lck: &__kmp_initz_lock);
7522	if (__kmp_init_middle) {
7523	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
7524	return;
7525	}
7526	__kmp_do_middle_initialize();
7527	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
7528	}
7529
7530	void __kmp_parallel_initialize(void) {
7531	int gtid = __kmp_entry_gtid(); // this might be a new root
7532
7533	/ synchronize parallel initialization (for sibling) /
7534	if (TCR_4(__kmp_init_parallel))
7535	return;
7536	__kmp_acquire_bootstrap_lock(lck: &__kmp_initz_lock);
7537	if (TCR_4(__kmp_init_parallel)) {
7538	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
7539	return;
7540	}
7541
7542	/ TODO reinitialization after we have already shut down /
7543	if (TCR_4(__kmp_global.g.g_done)) {
7544	KA_TRACE(
7545	`10`,
7546	("__kmp_parallel_initialize: attempt to init while shutting down\n"));
7547	__kmp_infinite_loop();
7548	}
7549
7550	/ jc: The lock __kmp_initz_lock is already held, so calling*
7551	__kmp_serial_initialize would cause a deadlock. So we call
7552	__kmp_do_serial_initialize directly. /*
7553	if (!__kmp_init_middle) {
7554	__kmp_do_middle_initialize();
7555	}
7556	__kmp_assign_root_init_mask();
7557	__kmp_resume_if_hard_paused();
7558
7559	/ begin initialization /
7560	KA_TRACE(`10`, ("__kmp_parallel_initialize: enter\n"));
7561	KMP_ASSERT(KMP_UBER_GTID(gtid));
7562
7563	#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
7564	// Save the FP control regs.
7565	// Worker threads will set theirs to these values at thread startup.
7566	__kmp_store_x87_fpu_control_word(p: &__kmp_init_x87_fpu_control_word);
7567	__kmp_store_mxcsr(p: &__kmp_init_mxcsr);
7568	__kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7569	#endif /* KMP_ARCH_X86 \|\| KMP_ARCH_X86_64 */
7570
7571	#if KMP_OS_UNIX
7572	#if KMP_HANDLE_SIGNALS
7573	/ must be after __kmp_serial_initialize /
7574	__kmp_install_signals(TRUE);
7575	#endif
7576	#endif
7577
7578	__kmp_suspend_initialize();
7579
7580	#if defined(USE_LOAD_BALANCE)
7581	if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7582	__kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7583	}
7584	#else
7585	if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7586	__kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7587	}
7588	#endif
7589
7590	if (__kmp_version) {
7591	__kmp_print_version_2();
7592	}
7593
7594	/ we have finished parallel initialization /
7595	TCW_SYNC_4(__kmp_init_parallel, TRUE);
7596
7597	KMP_MB();
7598	KA_TRACE(`10`, ("__kmp_parallel_initialize: exit\n"));
7599
7600	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
7601	}
7602
7603	void __kmp_hidden_helper_initialize() {
7604	if (TCR_4(__kmp_init_hidden_helper))
7605	return;
7606
7607	// __kmp_parallel_initialize is required before we initialize hidden helper
7608	if (!TCR_4(__kmp_init_parallel))
7609	__kmp_parallel_initialize();
7610
7611	// Double check. Note that this double check should not be placed before
7612	// __kmp_parallel_initialize as it will cause dead lock.
7613	__kmp_acquire_bootstrap_lock(lck: &__kmp_initz_lock);
7614	if (TCR_4(__kmp_init_hidden_helper)) {
7615	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
7616	return;
7617	}
7618
7619	#if KMP_AFFINITY_SUPPORTED
7620	// Initialize hidden helper affinity settings.
7621	// The above __kmp_parallel_initialize() will initialize
7622	// regular affinity (and topology) if not already done.
7623	if (!__kmp_hh_affinity.flags.initialized)
7624	__kmp_affinity_initialize(affinity&: __kmp_hh_affinity);
7625	#endif
7626
7627	// Set the count of hidden helper tasks to be executed to zero
7628	KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, `0`);
7629
7630	// Set the global variable indicating that we're initializing hidden helper
7631	// team/threads
7632	TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7633
7634	// Platform independent initialization
7635	__kmp_do_initialize_hidden_helper_threads();
7636
7637	// Wait here for the finish of initialization of hidden helper teams
7638	__kmp_hidden_helper_threads_initz_wait();
7639
7640	// We have finished hidden helper initialization
7641	TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7642
7643	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
7644	}
7645
7646	/ ------------------------------------------------------------------------ /
7647
7648	void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7649	kmp_team_t *team) {
7650	kmp_disp_t *dispatch;
7651
7652	KMP_MB();
7653
7654	/ none of the threads have encountered any constructs, yet. /
7655	this_thr->th.th_local.this_construct = `0`;
7656	#if KMP_CACHE_MANAGE
7657	KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7658	#endif /* KMP_CACHE_MANAGE */
7659	dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7660	KMP_DEBUG_ASSERT(dispatch);
7661	KMP_DEBUG_ASSERT(team->t.t_dispatch);
7662	// KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
7663	// this_thr->th.th_info.ds.ds_tid ] );
7664
7665	dispatch->th_disp_index = `0`; / reset the dispatch buffer counter /
7666	dispatch->th_doacross_buf_idx = `0`; // reset doacross dispatch buffer counter
7667	if (__kmp_env_consistency_check)
7668	__kmp_push_parallel(gtid, ident: team->t.t_ident);
7669
7670	KMP_MB(); / Flush all pending memory write invalidates. /
7671	}
7672
7673	void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7674	kmp_team_t *team) {
7675	if (__kmp_env_consistency_check)
7676	__kmp_pop_parallel(gtid, ident: team->t.t_ident);
7677
7678	__kmp_finish_implicit_task(this_thr);
7679	}
7680
7681	int __kmp_invoke_task_func(int gtid) {
7682	int rc;
7683	int tid = __kmp_tid_from_gtid(gtid);
7684	kmp_info_t *this_thr = __kmp_threads[gtid];
7685	kmp_team_t *team = this_thr->th.th_team;
7686
7687	__kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7688	#if USE_ITT_BUILD
7689	if (__itt_stack_caller_create_ptr) {
7690	// inform ittnotify about entering user's code
7691	if (team->t.t_stack_id != NULL) {
7692	__kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7693	} else {
7694	KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7695	__kmp_itt_stack_callee_enter(
7696	(__itt_caller)team->t.t_parent->t.t_stack_id);
7697	}
7698	}
7699	#endif /* USE_ITT_BUILD */
7700	#if INCLUDE_SSC_MARKS
7701	SSC_MARK_INVOKING();
7702	#endif
7703
7704	#if OMPT_SUPPORT
7705	void *dummy;
7706	void **exit_frame_p;
7707	ompt_data_t *my_task_data;
7708	ompt_data_t *my_parallel_data;
7709	int ompt_team_size;
7710
7711	if (ompt_enabled.enabled) {
7712	exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7713	.ompt_task_info.frame.exit_frame.ptr);
7714	} else {
7715	exit_frame_p = &dummy;
7716	}
7717
7718	my_task_data =
7719	&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7720	my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7721	if (ompt_enabled.ompt_callback_implicit_task) {
7722	ompt_team_size = team->t.t_nproc;
7723	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7724	ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7725	__kmp_tid_from_gtid(gtid), ompt_task_implicit);
7726	OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7727	}
7728	#endif
7729
7730	#if KMP_STATS_ENABLED
7731	stats_state_e previous_state = KMP_GET_THREAD_STATE();
7732	if (previous_state == stats_state_e::TEAMS_REGION) {
7733	KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7734	} else {
7735	KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7736	}
7737	KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7738	#endif
7739
7740	rc = __kmp_invoke_microtask(pkfn: (microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7741	npr: tid, argc: (int)team->t.t_argc, argv: (void **)team->t.t_argv
7742	#if OMPT_SUPPORT
7743	,
7744	exit_frame_ptr: exit_frame_p
7745	#endif
7746	);
7747	#if OMPT_SUPPORT
7748	*exit_frame_p = NULL;
7749	this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_team;
7750	#endif
7751
7752	#if KMP_STATS_ENABLED
7753	if (previous_state == stats_state_e::TEAMS_REGION) {
7754	KMP_SET_THREAD_STATE(previous_state);
7755	}
7756	KMP_POP_PARTITIONED_TIMER();
7757	#endif
7758
7759	#if USE_ITT_BUILD
7760	if (__itt_stack_caller_create_ptr) {
7761	// inform ittnotify about leaving user's code
7762	if (team->t.t_stack_id != NULL) {
7763	__kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7764	} else {
7765	KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7766	__kmp_itt_stack_callee_leave(
7767	(__itt_caller)team->t.t_parent->t.t_stack_id);
7768	}
7769	}
7770	#endif /* USE_ITT_BUILD */
7771	__kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7772
7773	return rc;
7774	}
7775
7776	void __kmp_teams_master(int gtid) {
7777	// This routine is called by all primary threads in teams construct
7778	kmp_info_t *thr = __kmp_threads[gtid];
7779	kmp_team_t *team = thr->th.th_team;
7780	ident_t *loc = team->t.t_ident;
7781	thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7782	KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7783	KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7784	KA_TRACE(`20`, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7785	__kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7786
7787	// This thread is a new CG root. Set up the proper variables.
7788	kmp_cg_root_t tmp = (kmp_cg_root_t )__kmp_allocate(sizeof(kmp_cg_root_t));
7789	tmp->cg_root = thr; // Make thr the CG root
7790	// Init to thread limit stored when league primary threads were forked
7791	tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7792	tmp->cg_nthreads = `1`; // Init counter to one active thread, this one
7793	KA_TRACE(`100`, ("__kmp_teams_master: Thread %p created node %p and init"
7794	" cg_nthreads to 1\n",
7795	thr, tmp));
7796	tmp->up = thr->th.th_cg_roots;
7797	thr->th.th_cg_roots = tmp;
7798
7799	// Launch league of teams now, but not let workers execute
7800	// (they hang on fork barrier until next parallel)
7801	#if INCLUDE_SSC_MARKS
7802	SSC_MARK_FORKING();
7803	#endif
7804	__kmp_fork_call(loc, gtid, call_context: fork_context_intel, argc: team->t.t_argc,
7805	microtask: (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
7806	VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7807	#if INCLUDE_SSC_MARKS
7808	SSC_MARK_JOINING();
7809	#endif
7810	// If the team size was reduced from the limit, set it to the new size
7811	if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7812	thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7813	// AC: last parameter "1" eliminates join barrier which won't work because
7814	// worker threads are in a fork barrier waiting for more parallel regions
7815	__kmp_join_call(loc, gtid
7816	#if OMPT_SUPPORT
7817	,
7818	fork_context: fork_context_intel
7819	#endif
7820	,
7821	exit_teams: `1`);
7822	}
7823
7824	int __kmp_invoke_teams_master(int gtid) {
7825	kmp_info_t *this_thr = __kmp_threads[gtid];
7826	kmp_team_t *team = this_thr->th.th_team;
7827	#if KMP_DEBUG
7828	if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7829	KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7830	(void *)__kmp_teams_master);
7831	#endif
7832	__kmp_run_before_invoked_task(gtid, tid: `0`, this_thr, team);
7833	#if OMPT_SUPPORT
7834	int tid = __kmp_tid_from_gtid(gtid);
7835	ompt_data_t *task_data =
7836	&team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7837	ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7838	if (ompt_enabled.ompt_callback_implicit_task) {
7839	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7840	ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7841	ompt_task_initial);
7842	OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7843	}
7844	#endif
7845	__kmp_teams_master(gtid);
7846	#if OMPT_SUPPORT
7847	this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_league;
7848	#endif
7849	__kmp_run_after_invoked_task(gtid, tid: `0`, this_thr, team);
7850	return `1`;
7851	}
7852
7853	/ this sets the requested number of threads for the next parallel region*
7854	encountered by this team. since this should be enclosed in the forkjoin
7855	critical section it should avoid race conditions with asymmetrical nested
7856	parallelism /*
7857	void __kmp_push_num_threads(ident_t id, int* gtid, int num_threads) {
7858	kmp_info_t *thr = __kmp_threads[gtid];
7859
7860	if (num_threads > `0`)
7861	thr->th.th_set_nproc = num_threads;
7862	}
7863
7864	void __kmp_push_num_threads_list(ident_t id, int* gtid, kmp_uint32 list_length,
7865	int *num_threads_list) {
7866	kmp_info_t *thr = __kmp_threads[gtid];
7867
7868	KMP_DEBUG_ASSERT(list_length > `1`);
7869
7870	if (num_threads_list[`0`] > `0`)
7871	thr->th.th_set_nproc = num_threads_list[`0`];
7872	thr->th.th_set_nested_nth =
7873	(int )KMP_INTERNAL_MALLOC(list_length sizeof(int));
7874	for (kmp_uint32 i = `0`; i < list_length; ++i)
7875	thr->th.th_set_nested_nth[i] = num_threads_list[i];
7876	thr->th.th_set_nested_nth_sz = list_length;
7877	}
7878
7879	void __kmp_set_strict_num_threads(ident_t loc, int* gtid, int sev,
7880	const char *msg) {
7881	kmp_info_t *thr = __kmp_threads[gtid];
7882	thr->th.th_nt_strict = true;
7883	thr->th.th_nt_loc = loc;
7884	// if sev is unset make fatal
7885	if (sev == severity_warning)
7886	thr->th.th_nt_sev = sev;
7887	else
7888	thr->th.th_nt_sev = severity_fatal;
7889	// if msg is unset, use an appropriate message
7890	if (msg)
7891	thr->th.th_nt_msg = msg;
7892	else
7893	thr->th.th_nt_msg = "Cannot form team with number of threads specified by "
7894	"strict num_threads clause.";
7895	}
7896
7897	static void __kmp_push_thread_limit(kmp_info_t thr, int* num_teams,
7898	int num_threads) {
7899	KMP_DEBUG_ASSERT(thr);
7900	// Remember the number of threads for inner parallel regions
7901	if (!TCR_4(__kmp_init_middle))
7902	__kmp_middle_initialize(); // get internal globals calculated
7903	__kmp_assign_root_init_mask();
7904	KMP_DEBUG_ASSERT(__kmp_avail_proc);
7905	KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7906
7907	if (num_threads == `0`) {
7908	if (__kmp_teams_thread_limit > `0`) {
7909	num_threads = __kmp_teams_thread_limit;
7910	} else {
7911	num_threads = __kmp_avail_proc / num_teams;
7912	}
7913	// adjust num_threads w/o warning as it is not user setting
7914	// num_threads = min(num_threads, nthreads-var, thread-limit-var)
7915	// no thread_limit clause specified - do not change thread-limit-var ICV
7916	if (num_threads > __kmp_dflt_team_nth) {
7917	num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7918	}
7919	if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7920	num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7921	} // prevent team size to exceed thread-limit-var
7922	if (num_teams * num_threads > __kmp_teams_max_nth) {
7923	num_threads = __kmp_teams_max_nth / num_teams;
7924	}
7925	if (num_threads == `0`) {
7926	num_threads = `1`;
7927	}
7928	} else {
7929	if (num_threads < `0`) {
7930	__kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, `1`),
7931	__kmp_msg_null);
7932	num_threads = `1`;
7933	}
7934	// This thread will be the primary thread of the league primary threads
7935	// Store new thread limit; old limit is saved in th_cg_roots list
7936	thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7937	// num_threads = min(num_threads, nthreads-var)
7938	if (num_threads > __kmp_dflt_team_nth) {
7939	num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV
7940	}
7941	if (num_teams * num_threads > __kmp_teams_max_nth) {
7942	int new_threads = __kmp_teams_max_nth / num_teams;
7943	if (new_threads == `0`) {
7944	new_threads = `1`;
7945	}
7946	if (new_threads != num_threads) {
7947	if (!__kmp_reserve_warn) { // user asked for too many threads
7948	__kmp_reserve_warn = `1`; // conflicts with KMP_TEAMS_THREAD_LIMIT
7949	__kmp_msg(kmp_ms_warning,
7950	KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7951	KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7952	}
7953	}
7954	num_threads = new_threads;
7955	}
7956	}
7957	thr->th.th_teams_size.nth = num_threads;
7958	}
7959
7960	/ this sets the requested number of teams for the teams region and/or*
7961	the number of threads for the next parallel region encountered /*
7962	void __kmp_push_num_teams(ident_t id, int* gtid, int num_teams,
7963	int num_threads) {
7964	kmp_info_t *thr = __kmp_threads[gtid];
7965	if (num_teams < `0`) {
7966	// OpenMP specification requires requested values to be positive,
7967	// but people can send us any value, so we'd better check
7968	__kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, `1`),
7969	__kmp_msg_null);
7970	num_teams = `1`;
7971	}
7972	if (num_teams == `0`) {
7973	if (__kmp_nteams > `0`) {
7974	num_teams = __kmp_nteams;
7975	} else {
7976	num_teams = `1`; // default number of teams is 1.
7977	}
7978	}
7979	if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
7980	if (!__kmp_reserve_warn) {
7981	__kmp_reserve_warn = `1`;
7982	__kmp_msg(kmp_ms_warning,
7983	KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7984	KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7985	}
7986	num_teams = __kmp_teams_max_nth;
7987	}
7988	// Set number of teams (number of threads in the outer "parallel" of the
7989	// teams)
7990	thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7991
7992	__kmp_push_thread_limit(thr, num_teams, num_threads);
7993	}
7994
7995	/ This sets the requested number of teams for the teams region and/or*
7996	the number of threads for the next parallel region encountered /*
7997	void __kmp_push_num_teams_51(ident_t id, int* gtid, int num_teams_lb,
7998	int num_teams_ub, int num_threads) {
7999	kmp_info_t *thr = __kmp_threads[gtid];
8000	KMP_DEBUG_ASSERT(num_teams_lb >= `0` && num_teams_ub >= `0`);
8001	KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
8002	KMP_DEBUG_ASSERT(num_threads >= `0`);
8003
8004	if (num_teams_lb > num_teams_ub) {
8005	__kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
8006	KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
8007	}
8008
8009	int num_teams = `1`; // defalt number of teams is 1.
8010
8011	if (num_teams_lb == `0` && num_teams_ub > `0`)
8012	num_teams_lb = num_teams_ub;
8013
8014	if (num_teams_lb == `0` && num_teams_ub == `0`) { // no num_teams clause
8015	num_teams = (__kmp_nteams > `0`) ? __kmp_nteams : num_teams;
8016	if (num_teams > __kmp_teams_max_nth) {
8017	if (!__kmp_reserve_warn) {
8018	__kmp_reserve_warn = `1`;
8019	__kmp_msg(kmp_ms_warning,
8020	KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
8021	KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
8022	}
8023	num_teams = __kmp_teams_max_nth;
8024	}
8025	} else if (num_teams_lb == num_teams_ub) { // requires exact number of teams
8026	num_teams = num_teams_ub;
8027	} else { // num_teams_lb <= num_teams <= num_teams_ub
8028	if (num_threads <= `0`) {
8029	if (num_teams_ub > __kmp_teams_max_nth) {
8030	num_teams = num_teams_lb;
8031	} else {
8032	num_teams = num_teams_ub;
8033	}
8034	} else {
8035	num_teams = (num_threads > __kmp_teams_max_nth)
8036	? num_teams
8037	: __kmp_teams_max_nth / num_threads;
8038	if (num_teams < num_teams_lb) {
8039	num_teams = num_teams_lb;
8040	} else if (num_teams > num_teams_ub) {
8041	num_teams = num_teams_ub;
8042	}
8043	}
8044	}
8045	// Set number of teams (number of threads in the outer "parallel" of the
8046	// teams)
8047	thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
8048
8049	__kmp_push_thread_limit(thr, num_teams, num_threads);
8050	}
8051
8052	// Set the proc_bind var to use in the following parallel region.
8053	void __kmp_push_proc_bind(ident_t id, int* gtid, kmp_proc_bind_t proc_bind) {
8054	kmp_info_t *thr = __kmp_threads[gtid];
8055	thr->th.th_set_proc_bind = proc_bind;
8056	}
8057
8058	/ Launch the worker threads into the microtask. /
8059
8060	void __kmp_internal_fork(ident_t id, int* gtid, kmp_team_t *team) {
8061	kmp_info_t *this_thr = __kmp_threads[gtid];
8062
8063	#ifdef KMP_DEBUG
8064	int f;
8065	#endif /* KMP_DEBUG */
8066
8067	KMP_DEBUG_ASSERT(team);
8068	KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8069	KMP_ASSERT(KMP_MASTER_GTID(gtid));
8070	KMP_MB(); / Flush all pending memory write invalidates. /
8071
8072	team->t.t_construct = `0`; / no single directives seen yet /
8073	team->t.t_ordered.dt.t_value =
8074	`0`; / thread 0 enters the ordered section first /
8075
8076	/ Reset the identifiers on the dispatch buffer /
8077	KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
8078	if (team->t.t_max_nproc > `1`) {
8079	int i;
8080	for (i = `0`; i < __kmp_dispatch_num_buffers; ++i) {
8081	team->t.t_disp_buffer[i].buffer_index = i;
8082	team->t.t_disp_buffer[i].doacross_buf_idx = i;
8083	}
8084	} else {
8085	team->t.t_disp_buffer[`0`].buffer_index = `0`;
8086	team->t.t_disp_buffer[`0`].doacross_buf_idx = `0`;
8087	}
8088
8089	KMP_MB(); / Flush all pending memory write invalidates. /
8090	KMP_ASSERT(this_thr->th.th_team == team);
8091
8092	#ifdef KMP_DEBUG
8093	for (f = `0`; f < team->t.t_nproc; f++) {
8094	KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8095	team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8096	}
8097	#endif /* KMP_DEBUG */
8098
8099	/ release the worker threads so they may begin working /
8100	__kmp_fork_barrier(gtid, tid: `0`);
8101	}
8102
8103	void __kmp_internal_join(ident_t id, int* gtid, kmp_team_t *team) {
8104	kmp_info_t *this_thr = __kmp_threads[gtid];
8105
8106	KMP_DEBUG_ASSERT(team);
8107	KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8108	KMP_ASSERT(KMP_MASTER_GTID(gtid));
8109	KMP_MB(); / Flush all pending memory write invalidates. /
8110
8111	/ Join barrier after fork /
8112
8113	#ifdef KMP_DEBUG
8114	if (__kmp_threads[gtid] &&
8115	__kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8116	__kmp_printf(format: "GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8117	__kmp_threads[gtid]);
8118	__kmp_printf(format: "__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8119	"team->t.t_nproc=%d\n",
8120	gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8121	team->t.t_nproc);
8122	__kmp_print_structure();
8123	}
8124	KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8125	__kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8126	#endif /* KMP_DEBUG */
8127
8128	__kmp_join_barrier(gtid); / wait for everyone /
8129	#if OMPT_SUPPORT
8130	ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
8131	if (ompt_enabled.enabled &&
8132	(ompt_state == ompt_state_wait_barrier_teams \|\|
8133	ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
8134	int ds_tid = this_thr->th.th_info.ds.ds_tid;
8135	ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8136	this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8137	#if OMPT_OPTIONAL
8138	void *codeptr = NULL;
8139	if (KMP_MASTER_TID(ds_tid) &&
8140	(ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) \|\|
8141	ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8142	codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8143
8144	ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
8145	if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
8146	sync_kind = ompt_sync_region_barrier_teams;
8147	if (ompt_enabled.ompt_callback_sync_region_wait) {
8148	ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8149	sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8150	}
8151	if (ompt_enabled.ompt_callback_sync_region) {
8152	ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8153	sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8154	}
8155	#endif
8156	if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8157	ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8158	ompt_scope_end, NULL, task_data, `0`, ds_tid,
8159	ompt_task_implicit); // TODO: Can this be ompt_task_initial?
8160	}
8161	}
8162	#endif
8163
8164	KMP_MB(); / Flush all pending memory write invalidates. /
8165	KMP_ASSERT(this_thr->th.th_team == team);
8166	}
8167
8168	/ ------------------------------------------------------------------------ /
8169
8170	#ifdef USE_LOAD_BALANCE
8171
8172	// Return the worker threads actively spinning in the hot team, if we
8173	// are at the outermost level of parallelism. Otherwise, return 0.
8174	static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8175	int i;
8176	int retval;
8177	kmp_team_t *hot_team;
8178
8179	if (root->r.r_active) {
8180	return `0`;
8181	}
8182	hot_team = root->r.r_hot_team;
8183	if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8184	return hot_team->t.t_nproc - `1`; // Don't count primary thread
8185	}
8186
8187	// Skip the primary thread - it is accounted for elsewhere.
8188	retval = `0`;
8189	for (i = `1`; i < hot_team->t.t_nproc; i++) {
8190	if (hot_team->t.t_threads[i]->th.th_active) {
8191	retval++;
8192	}
8193	}
8194	return retval;
8195	}
8196
8197	// Perform an automatic adjustment to the number of
8198	// threads used by the next parallel region.
8199	static int __kmp_load_balance_nproc(kmp_root_t root, int* set_nproc) {
8200	int retval;
8201	int pool_active;
8202	int hot_team_active;
8203	int team_curr_active;
8204	int system_active;
8205
8206	KB_TRACE(`20`, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8207	set_nproc));
8208	KMP_DEBUG_ASSERT(root);
8209	KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[`0`]
8210	->th.th_current_task->td_icvs.dynamic == TRUE);
8211	KMP_DEBUG_ASSERT(set_nproc > `1`);
8212
8213	if (set_nproc == `1`) {
8214	KB_TRACE(`20`, ("__kmp_load_balance_nproc: serial execution.\n"));
8215	return `1`;
8216	}
8217
8218	// Threads that are active in the thread pool, active in the hot team for this
8219	// particular root (if we are at the outer par level), and the currently
8220	// executing thread (to become the primary thread) are available to add to the
8221	// new team, but are currently contributing to the system load, and must be
8222	// accounted for.
8223	pool_active = __kmp_thread_pool_active_nth;
8224	hot_team_active = __kmp_active_hot_team_nproc(root);
8225	team_curr_active = pool_active + hot_team_active + `1`;
8226
8227	// Check the system load.
8228	system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8229	KB_TRACE(`30`, ("__kmp_load_balance_nproc: system active = %d pool active = %d "
8230	"hot team active = %d\n",
8231	system_active, pool_active, hot_team_active));
8232
8233	if (system_active < `0`) {
8234	// There was an error reading the necessary info from /proc, so use the
8235	// thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
8236	// = dynamic_thread_limit, we shouldn't wind up getting back here.
8237	__kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8238	KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit");
8239
8240	// Make this call behave like the thread limit algorithm.
8241	retval = __kmp_avail_proc - __kmp_nth +
8242	(root->r.r_active ? `1` : root->r.r_hot_team->t.t_nproc);
8243	if (retval > set_nproc) {
8244	retval = set_nproc;
8245	}
8246	if (retval < KMP_MIN_NTH) {
8247	retval = KMP_MIN_NTH;
8248	}
8249
8250	KB_TRACE(`20`, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8251	retval));
8252	return retval;
8253	}
8254
8255	// There is a slight delay in the load balance algorithm in detecting new
8256	// running procs. The real system load at this instant should be at least as
8257	// large as the #active omp thread that are available to add to the team.
8258	if (system_active < team_curr_active) {
8259	system_active = team_curr_active;
8260	}
8261	retval = __kmp_avail_proc - system_active + team_curr_active;
8262	if (retval > set_nproc) {
8263	retval = set_nproc;
8264	}
8265	if (retval < KMP_MIN_NTH) {
8266	retval = KMP_MIN_NTH;
8267	}
8268
8269	KB_TRACE(`20`, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8270	return retval;
8271	} // __kmp_load_balance_nproc()
8272
8273	#endif /* USE_LOAD_BALANCE */
8274
8275	/ ------------------------------------------------------------------------ /
8276
8277	/ NOTE: this is called with the __kmp_init_lock held /
8278	void __kmp_cleanup(void) {
8279	int f;
8280
8281	KA_TRACE(`10`, ("__kmp_cleanup: enter\n"));
8282
8283	if (TCR_4(__kmp_init_parallel)) {
8284	#if KMP_HANDLE_SIGNALS
8285	__kmp_remove_signals();
8286	#endif
8287	TCW_4(__kmp_init_parallel, FALSE);
8288	}
8289
8290	if (TCR_4(__kmp_init_middle)) {
8291	#if KMP_AFFINITY_SUPPORTED
8292	__kmp_affinity_uninitialize();
8293	#endif /* KMP_AFFINITY_SUPPORTED */
8294	__kmp_cleanup_hierarchy();
8295	TCW_4(__kmp_init_middle, FALSE);
8296	}
8297
8298	KA_TRACE(`10`, ("__kmp_cleanup: go serial cleanup\n"));
8299
8300	if (__kmp_init_serial) {
8301	__kmp_runtime_destroy();
8302	__kmp_init_serial = FALSE;
8303	}
8304
8305	__kmp_cleanup_threadprivate_caches();
8306
8307	for (f = `0`; f < __kmp_threads_capacity; f++) {
8308	if (__kmp_root[f] != NULL) {
8309	__kmp_free(__kmp_root[f]);
8310	__kmp_root[f] = NULL;
8311	}
8312	}
8313	__kmp_free(__kmp_threads);
8314	// __kmp_threads and __kmp_root were allocated at once, as single block, so
8315	// there is no need in freeing __kmp_root.
8316	__kmp_threads = NULL;
8317	__kmp_root = NULL;
8318	__kmp_threads_capacity = `0`;
8319
8320	// Free old __kmp_threads arrays if they exist.
8321	kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8322	while (ptr) {
8323	kmp_old_threads_list_t *next = ptr->next;
8324	__kmp_free(ptr->threads);
8325	__kmp_free(ptr);
8326	ptr = next;
8327	}
8328
8329	#if KMP_USE_DYNAMIC_LOCK
8330	__kmp_cleanup_indirect_user_locks();
8331	#else
8332	__kmp_cleanup_user_locks();
8333	#endif
8334	#if OMPD_SUPPORT
8335	if (ompd_state) {
8336	__kmp_free(ompd_env_block);
8337	ompd_env_block = NULL;
8338	ompd_env_block_size = `0`;
8339	}
8340	#endif
8341
8342	#if KMP_AFFINITY_SUPPORTED
8343	KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file));
8344	__kmp_cpuinfo_file = NULL;
8345	#endif /* KMP_AFFINITY_SUPPORTED */
8346
8347	#if KMP_USE_ADAPTIVE_LOCKS
8348	#if KMP_DEBUG_ADAPTIVE_LOCKS
8349	__kmp_print_speculative_stats();
8350	#endif
8351	#endif
8352	KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8353	__kmp_nested_nth.nth = NULL;
8354	__kmp_nested_nth.size = `0`;
8355	__kmp_nested_nth.used = `0`;
8356
8357	KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8358	__kmp_nested_proc_bind.bind_types = NULL;
8359	__kmp_nested_proc_bind.size = `0`;
8360	__kmp_nested_proc_bind.used = `0`;
8361	if (__kmp_affinity_format) {
8362	KMP_INTERNAL_FREE(__kmp_affinity_format);
8363	__kmp_affinity_format = NULL;
8364	}
8365
8366	__kmp_i18n_catclose();
8367
8368	#if KMP_USE_HIER_SCHED
8369	__kmp_hier_scheds.deallocate();
8370	#endif
8371
8372	#if KMP_STATS_ENABLED
8373	__kmp_stats_fini();
8374	#endif
8375
8376	KA_TRACE(`10`, ("__kmp_cleanup: exit\n"));
8377	}
8378
8379	/ ------------------------------------------------------------------------ /
8380
8381	int __kmp_ignore_mppbeg(void) {
8382	char *env;
8383
8384	if ((env = getenv(name: "KMP_IGNORE_MPPBEG")) != NULL) {
8385	if (__kmp_str_match_false(data: env))
8386	return FALSE;
8387	}
8388	// By default __kmpc_begin() is no-op.
8389	return TRUE;
8390	}
8391
8392	int __kmp_ignore_mppend(void) {
8393	char *env;
8394
8395	if ((env = getenv(name: "KMP_IGNORE_MPPEND")) != NULL) {
8396	if (__kmp_str_match_false(data: env))
8397	return FALSE;
8398	}
8399	// By default __kmpc_end() is no-op.
8400	return TRUE;
8401	}
8402
8403	void __kmp_internal_begin(void) {
8404	int gtid;
8405	kmp_root_t *root;
8406
8407	/ this is a very important step as it will register new sibling threads*
8408	and assign these new uber threads a new gtid /*
8409	gtid = __kmp_entry_gtid();
8410	root = __kmp_threads[gtid]->th.th_root;
8411	KMP_ASSERT(KMP_UBER_GTID(gtid));
8412
8413	if (root->r.r_begin)
8414	return;
8415	__kmp_acquire_lock(lck: &root->r.r_begin_lock, gtid);
8416	if (root->r.r_begin) {
8417	__kmp_release_lock(lck: &root->r.r_begin_lock, gtid);
8418	return;
8419	}
8420
8421	root->r.r_begin = TRUE;
8422
8423	__kmp_release_lock(lck: &root->r.r_begin_lock, gtid);
8424	}
8425
8426	/ ------------------------------------------------------------------------ /
8427
8428	void __kmp_user_set_library(enum library_type arg) {
8429	int gtid;
8430	kmp_root_t *root;
8431	kmp_info_t *thread;
8432
8433	/ first, make sure we are initialized so we can get our gtid /
8434
8435	gtid = __kmp_entry_gtid();
8436	thread = __kmp_threads[gtid];
8437
8438	root = thread->th.th_root;
8439
8440	KA_TRACE(`20`, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8441	library_serial));
8442	if (root->r.r_in_parallel) { / Must be called in serial section of top-level*
8443	thread /*
8444	KMP_WARNING(SetLibraryIncorrectCall);
8445	return;
8446	}
8447
8448	switch (arg) {
8449	case library_serial:
8450	thread->th.th_set_nproc = `0`;
8451	set__nproc(thread, `1`);
8452	break;
8453	case library_turnaround:
8454	thread->th.th_set_nproc = `0`;
8455	set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8456	: __kmp_dflt_team_nth_ub);
8457	break;
8458	case library_throughput:
8459	thread->th.th_set_nproc = `0`;
8460	set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8461	: __kmp_dflt_team_nth_ub);
8462	break;
8463	default:
8464	KMP_FATAL(UnknownLibraryType, arg);
8465	}
8466
8467	__kmp_aux_set_library(arg);
8468	}
8469
8470	void __kmp_aux_set_stacksize(size_t arg) {
8471	if (!__kmp_init_serial)
8472	__kmp_serial_initialize();
8473
8474	#if KMP_OS_DARWIN
8475	if (arg & (`0x1000` - `1`)) {
8476	arg &= ~(`0x1000` - `1`);
8477	if (arg + `0x1000`) / check for overflow if we round up /
8478	arg += `0x1000`;
8479	}
8480	#endif
8481	__kmp_acquire_bootstrap_lock(lck: &__kmp_initz_lock);
8482
8483	/ only change the default stacksize before the first parallel region /
8484	if (!TCR_4(__kmp_init_parallel)) {
8485	size_t value = arg; / argument is in bytes /
8486
8487	if (value < __kmp_sys_min_stksize)
8488	value = __kmp_sys_min_stksize;
8489	else if (value > KMP_MAX_STKSIZE)
8490	value = KMP_MAX_STKSIZE;
8491
8492	__kmp_stksize = value;
8493
8494	__kmp_env_stksize = TRUE; / was KMP_STACKSIZE specified? /
8495	}
8496
8497	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
8498	}
8499
8500	/ set the behaviour of the runtime library /
8501	/ TODO this can cause some odd behaviour with sibling parallelism... /
8502	void __kmp_aux_set_library(enum library_type arg) {
8503	__kmp_library = arg;
8504
8505	switch (__kmp_library) {
8506	case library_serial: {
8507	KMP_INFORM(LibraryIsSerial);
8508	} break;
8509	case library_turnaround:
8510	if (__kmp_use_yield == `1` && !__kmp_use_yield_exp_set)
8511	__kmp_use_yield = `2`; // only yield when oversubscribed
8512	break;
8513	case library_throughput:
8514	if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8515	__kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8516	break;
8517	default:
8518	KMP_FATAL(UnknownLibraryType, arg);
8519	}
8520	}
8521
8522	/ Getting team information common for all team API /
8523	// Returns NULL if not in teams construct
8524	static kmp_team_t __kmp_aux_get_team_info(int* &teams_serialized) {
8525	kmp_info_t *thr = __kmp_entry_thread();
8526	teams_serialized = `0`;
8527	if (thr->th.th_teams_microtask) {
8528	kmp_team_t *team = thr->th.th_team;
8529	int tlevel = thr->th.th_teams_level; // the level of the teams construct
8530	int ii = team->t.t_level;
8531	teams_serialized = team->t.t_serialized;
8532	int level = tlevel + `1`;
8533	KMP_DEBUG_ASSERT(ii >= tlevel);
8534	while (ii > level) {
8535	for (teams_serialized = team->t.t_serialized;
8536	(teams_serialized > `0`) && (ii > level); teams_serialized--, ii--) {
8537	}
8538	if (team->t.t_serialized && (!teams_serialized)) {
8539	team = team->t.t_parent;
8540	continue;
8541	}
8542	if (ii > level) {
8543	team = team->t.t_parent;
8544	ii--;
8545	}
8546	}
8547	return team;
8548	}
8549	return NULL;
8550	}
8551
8552	int __kmp_aux_get_team_num() {
8553	int serialized;
8554	kmp_team_t *team = __kmp_aux_get_team_info(teams_serialized&: serialized);
8555	if (team) {
8556	if (serialized > `1`) {
8557	return `0`; // teams region is serialized ( 1 team of 1 thread ).
8558	} else {
8559	return team->t.t_master_tid;
8560	}
8561	}
8562	return `0`;
8563	}
8564
8565	int __kmp_aux_get_num_teams() {
8566	int serialized;
8567	kmp_team_t *team = __kmp_aux_get_team_info(teams_serialized&: serialized);
8568	if (team) {
8569	if (serialized > `1`) {
8570	return `1`;
8571	} else {
8572	return team->t.t_parent->t.t_nproc;
8573	}
8574	}
8575	return `1`;
8576	}
8577
8578	/ ------------------------------------------------------------------------ /
8579
8580	/*
8581	* Affinity Format Parser
8582	*
8583	* Field is in form of: %[[[0].]size]type
8584	* % and type are required (%% means print a literal '%')
8585	* type is either single char or long name surrounded by {},
8586	* e.g., N or {num_threads}
8587	* 0 => leading zeros
8588	* . => right justified when size is specified
8589	* by default output is left justified
8590	* size is the minimum field length
8591	* All other characters are printed as is
8592	*
8593	* Available field types:
8594	* L {thread_level} - omp_get_level()
8595	* n {thread_num} - omp_get_thread_num()
8596	* h {host} - name of host machine
8597	* P {process_id} - process id (integer)
8598	* T {thread_identifier} - native thread identifier (integer)
8599	* N {num_threads} - omp_get_num_threads()
8600	* A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1)
8601	* a {thread_affinity} - comma separated list of integers or integer ranges
8602	* (values of affinity mask)
8603	*
8604	* Implementation-specific field types can be added
8605	* If a type is unknown, print "undefined"
8606	*/
8607
8608	// Structure holding the short name, long name, and corresponding data type
8609	// for snprintf. A table of these will represent the entire valid keyword
8610	// field types.
8611	typedef struct kmp_affinity_format_field_t {
8612	char short_name; // from spec e.g., L -> thread level
8613	const char long_name; // from spec thread_level -> thread level*
8614	char field_format; // data type for snprintf (typically 'd' or 's'
8615	// for integer or string)
8616	} kmp_affinity_format_field_t;
8617
8618	static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8619	#if KMP_AFFINITY_SUPPORTED
8620	{.short_name: `'A'`, .long_name: "thread_affinity", .field_format: `'s'`},
8621	#endif
8622	{.short_name: `'t'`, .long_name: "team_num", .field_format: `'d'`},
8623	{.short_name: `'T'`, .long_name: "num_teams", .field_format: `'d'`},
8624	{.short_name: `'L'`, .long_name: "nesting_level", .field_format: `'d'`},
8625	{.short_name: `'n'`, .long_name: "thread_num", .field_format: `'d'`},
8626	{.short_name: `'N'`, .long_name: "num_threads", .field_format: `'d'`},
8627	{.short_name: `'a'`, .long_name: "ancestor_tnum", .field_format: `'d'`},
8628	{.short_name: `'H'`, .long_name: "host", .field_format: `'s'`},
8629	{.short_name: `'P'`, .long_name: "process_id", .field_format: `'d'`},
8630	{.short_name: `'i'`, .long_name: "native_thread_id", .field_format: `'d'`}};
8631
8632	// Return the number of characters it takes to hold field
8633	static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th,
8634	const char **ptr,
8635	kmp_str_buf_t *field_buffer) {
8636	int rc, format_index, field_value;
8637	const char width_left, width_right;
8638	bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8639	static const int FORMAT_SIZE = `20`;
8640	char format[FORMAT_SIZE] = {`0`};
8641	char absolute_short_name = `0`;
8642
8643	KMP_DEBUG_ASSERT(gtid >= `0`);
8644	KMP_DEBUG_ASSERT(th);
8645	KMP_DEBUG_ASSERT(**ptr == `'%'`);
8646	KMP_DEBUG_ASSERT(field_buffer);
8647
8648	__kmp_str_buf_clear(buffer: field_buffer);
8649
8650	// Skip the initial %
8651	(*ptr)++;
8652
8653	// Check for %% first
8654	if (**ptr == `'%'`) {
8655	__kmp_str_buf_cat(buffer: field_buffer, str: "%", len: `1`);
8656	(ptr)++; // skip over the second %*
8657	return `1`;
8658	}
8659
8660	// Parse field modifiers if they are present
8661	pad_zeros = false;
8662	if (**ptr == `'0'`) {
8663	pad_zeros = true;
8664	(ptr)++; // skip over 0*
8665	}
8666	right_justify = false;
8667	if (**ptr == `'.'`) {
8668	right_justify = true;
8669	(ptr)++; // skip over .*
8670	}
8671	// Parse width of field: [width_left, width_right)
8672	width_left = width_right = NULL;
8673	if (ptr >= `'0'` && ptr <= `'9'`) {
8674	width_left = *ptr;
8675	SKIP_DIGITS(*ptr);
8676	width_right = *ptr;
8677	}
8678
8679	// Create the format for KMP_SNPRINTF based on flags parsed above
8680	format_index = `0`;
8681	format[format_index++] = `'%'`;
8682	if (!right_justify)
8683	format[format_index++] = `'-'`;
8684	if (pad_zeros)
8685	format[format_index++] = `'0'`;
8686	if (width_left && width_right) {
8687	int i = `0`;
8688	// Only allow 8 digit number widths.
8689	// This also prevents overflowing format variable
8690	while (i < `8` && width_left < width_right) {
8691	format[format_index++] = *width_left;
8692	width_left++;
8693	i++;
8694	}
8695	}
8696
8697	// Parse a name (long or short)
8698	// Canonicalize the name into absolute_short_name
8699	found_valid_name = false;
8700	parse_long_name = (**ptr == `'{'`);
8701	if (parse_long_name)
8702	(ptr)++; // skip initial left brace*
8703	for (size_t i = `0`; i < sizeof(__kmp_affinity_format_table) /
8704	sizeof(__kmp_affinity_format_table[`0`]);
8705	++i) {
8706	char short_name = __kmp_affinity_format_table[i].short_name;
8707	const char *long_name = __kmp_affinity_format_table[i].long_name;
8708	char field_format = __kmp_affinity_format_table[i].field_format;
8709	if (parse_long_name) {
8710	size_t length = KMP_STRLEN(s: long_name);
8711	if (strncmp(s1: *ptr, s2: long_name, n: length) == `0`) {
8712	found_valid_name = true;
8713	(ptr) += length; // skip the long name*
8714	}
8715	} else if (**ptr == short_name) {
8716	found_valid_name = true;
8717	(ptr)++; // skip the short name*
8718	}
8719	if (found_valid_name) {
8720	format[format_index++] = field_format;
8721	format[format_index++] = `'\0'`;
8722	absolute_short_name = short_name;
8723	break;
8724	}
8725	}
8726	if (parse_long_name) {
8727	if (**ptr != `'}'`) {
8728	absolute_short_name = `0`;
8729	} else {
8730	(ptr)++; // skip over the right brace*
8731	}
8732	}
8733
8734	// Attempt to fill the buffer with the requested
8735	// value using snprintf within __kmp_str_buf_print()
8736	switch (absolute_short_name) {
8737	case `'t'`:
8738	rc = __kmp_str_buf_print(buffer: field_buffer, format, __kmp_aux_get_team_num());
8739	break;
8740	case `'T'`:
8741	rc = __kmp_str_buf_print(buffer: field_buffer, format, __kmp_aux_get_num_teams());
8742	break;
8743	case `'L'`:
8744	rc = __kmp_str_buf_print(buffer: field_buffer, format, th->th.th_team->t.t_level);
8745	break;
8746	case `'n'`:
8747	rc = __kmp_str_buf_print(buffer: field_buffer, format, __kmp_tid_from_gtid(gtid));
8748	break;
8749	case `'H'`: {
8750	static const int BUFFER_SIZE = `256`;
8751	char buf[BUFFER_SIZE];
8752	__kmp_expand_host_name(buffer: buf, size: BUFFER_SIZE);
8753	rc = __kmp_str_buf_print(buffer: field_buffer, format, buf);
8754	} break;
8755	case `'P'`:
8756	rc = __kmp_str_buf_print(buffer: field_buffer, format, getpid());
8757	break;
8758	case `'i'`:
8759	rc = __kmp_str_buf_print(buffer: field_buffer, format, __kmp_gettid());
8760	break;
8761	case `'N'`:
8762	rc = __kmp_str_buf_print(buffer: field_buffer, format, th->th.th_team->t.t_nproc);
8763	break;
8764	case `'a'`:
8765	field_value =
8766	__kmp_get_ancestor_thread_num(gtid, level: th->th.th_team->t.t_level - `1`);
8767	rc = __kmp_str_buf_print(buffer: field_buffer, format, field_value);
8768	break;
8769	#if KMP_AFFINITY_SUPPORTED
8770	case `'A'`: {
8771	kmp_str_buf_t buf;
8772	__kmp_str_buf_init(&buf);
8773	__kmp_affinity_str_buf_mask(buf: &buf, mask: th->th.th_affin_mask);
8774	rc = __kmp_str_buf_print(buffer: field_buffer, format, buf.str);
8775	__kmp_str_buf_free(buffer: &buf);
8776	} break;
8777	#endif
8778	default:
8779	// According to spec, If an implementation does not have info for field
8780	// type, then "undefined" is printed
8781	rc = __kmp_str_buf_print(buffer: field_buffer, format: "%s", "undefined");
8782	// Skip the field
8783	if (parse_long_name) {
8784	SKIP_TOKEN(*ptr);
8785	if (**ptr == `'}'`)
8786	(*ptr)++;
8787	} else {
8788	(*ptr)++;
8789	}
8790	}
8791
8792	KMP_ASSERT(format_index <= FORMAT_SIZE);
8793	return rc;
8794	}
8795
8796	/*
8797	* Return number of characters needed to hold the affinity string
8798	* (not including null byte character)
8799	* The resultant string is printed to buffer, which the caller can then
8800	* handle afterwards
8801	*/
8802	size_t __kmp_aux_capture_affinity(int gtid, const char *format,
8803	kmp_str_buf_t *buffer) {
8804	const char *parse_ptr;
8805	size_t retval;
8806	const kmp_info_t *th;
8807	kmp_str_buf_t field;
8808
8809	KMP_DEBUG_ASSERT(buffer);
8810	KMP_DEBUG_ASSERT(gtid >= `0`);
8811
8812	__kmp_str_buf_init(&field);
8813	__kmp_str_buf_clear(buffer);
8814
8815	th = __kmp_threads[gtid];
8816	retval = `0`;
8817
8818	// If format is NULL or zero-length string, then we use
8819	// affinity-format-var ICV
8820	parse_ptr = format;
8821	if (parse_ptr == NULL \|\| *parse_ptr == `'\0'`) {
8822	parse_ptr = __kmp_affinity_format;
8823	}
8824	KMP_DEBUG_ASSERT(parse_ptr);
8825
8826	while (*parse_ptr != `'\0'`) {
8827	// Parse a field
8828	if (*parse_ptr == `'%'`) {
8829	// Put field in the buffer
8830	int rc = __kmp_aux_capture_affinity_field(gtid, th, ptr: &parse_ptr, field_buffer: &field);
8831	__kmp_str_buf_catbuf(dest: buffer, src: &field);
8832	retval += rc;
8833	} else {
8834	// Put literal character in buffer
8835	__kmp_str_buf_cat(buffer, str: parse_ptr, len: `1`);
8836	retval++;
8837	parse_ptr++;
8838	}
8839	}
8840	__kmp_str_buf_free(buffer: &field);
8841	return retval;
8842	}
8843
8844	// Displays the affinity string to stdout
8845	void __kmp_aux_display_affinity(int gtid, const char *format) {
8846	kmp_str_buf_t buf;
8847	__kmp_str_buf_init(&buf);
8848	__kmp_aux_capture_affinity(gtid, format, buffer: &buf);
8849	__kmp_fprintf(stream: kmp_out, format: "%s" KMP_END_OF_LINE, buf.str);
8850	__kmp_str_buf_free(buffer: &buf);
8851	}
8852
8853	/ ------------------------------------------------------------------------ /
8854	void __kmp_aux_set_blocktime(int arg, kmp_info_t thread, int* tid) {
8855	int blocktime = arg; / argument is in microseconds /
8856	#if KMP_USE_MONITOR
8857	int bt_intervals;
8858	#endif
8859	kmp_int8 bt_set;
8860
8861	__kmp_save_internal_controls(thread);
8862
8863	/ Normalize and set blocktime for the teams /
8864	if (blocktime < KMP_MIN_BLOCKTIME)
8865	blocktime = KMP_MIN_BLOCKTIME;
8866	else if (blocktime > KMP_MAX_BLOCKTIME)
8867	blocktime = KMP_MAX_BLOCKTIME;
8868
8869	set__blocktime_team(thread->th.th_team, tid, blocktime);
8870	set__blocktime_team(thread->th.th_serial_team, `0`, blocktime);
8871
8872	#if KMP_USE_MONITOR
8873	/ Calculate and set blocktime intervals for the teams /
8874	bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8875
8876	set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8877	set__bt_intervals_team(thread->th.th_serial_team, `0`, bt_intervals);
8878	#endif
8879
8880	/ Set whether blocktime has been set to "TRUE" /
8881	bt_set = TRUE;
8882
8883	set__bt_set_team(thread->th.th_team, tid, bt_set);
8884	set__bt_set_team(thread->th.th_serial_team, `0`, bt_set);
8885	#if KMP_USE_MONITOR
8886	KF_TRACE(`10`, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8887	"bt_intervals=%d, monitor_updates=%d\n",
8888	__kmp_gtid_from_tid(tid, thread->th.th_team),
8889	thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8890	__kmp_monitor_wakeups));
8891	#else
8892	KF_TRACE(`10`, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8893	__kmp_gtid_from_tid(tid, thread->th.th_team),
8894	thread->th.th_team->t.t_id, tid, blocktime));
8895	#endif
8896	}
8897
8898	void __kmp_aux_set_defaults(char const *str, size_t len) {
8899	if (!__kmp_init_serial) {
8900	__kmp_serial_initialize();
8901	}
8902	__kmp_env_initialize(str);
8903
8904	if (__kmp_settings \|\| __kmp_display_env \|\| __kmp_display_env_verbose) {
8905	__kmp_env_print();
8906	}
8907	} // __kmp_aux_set_defaults
8908
8909	/ ------------------------------------------------------------------------ /
8910	/ internal fast reduction routines /
8911
8912	PACKED_REDUCTION_METHOD_T
8913	__kmp_determine_reduction_method(
8914	ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
8915	void reduce_data, void* (reduce_func)(void* lhs_data, void* *rhs_data),
8916	kmp_critical_name *lck) {
8917
8918	// Default reduction method: critical construct ( lck != NULL, like in current
8919	// PAROPT )
8920	// If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
8921	// can be selected by RTL
8922	// If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
8923	// can be selected by RTL
8924	// Finally, it's up to OpenMP RTL to make a decision on which method to select
8925	// among generated by PAROPT.
8926
8927	PACKED_REDUCTION_METHOD_T retval;
8928
8929	int team_size;
8930
8931	KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )
8932
8933	#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8934	(loc && \
8935	((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8936	#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8937
8938	retval = critical_reduce_block;
8939
8940	// another choice of getting a team size (with 1 dynamic deference) is slower
8941	team_size = __kmp_get_team_num_threads(global_tid);
8942	if (team_size == `1`) {
8943
8944	retval = empty_reduce_block;
8945
8946	} else {
8947
8948	int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8949
8950	#if KMP_ARCH_X86_64 \|\| KMP_ARCH_PPC64 \|\| KMP_ARCH_AARCH64 \|\| \
8951	KMP_ARCH_MIPS64 \|\| KMP_ARCH_RISCV64 \|\| KMP_ARCH_LOONGARCH64 \|\| \
8952	KMP_ARCH_VE \|\| KMP_ARCH_S390X \|\| KMP_ARCH_WASM
8953
8954	#if KMP_OS_LINUX \|\| KMP_OS_DRAGONFLY \|\| KMP_OS_FREEBSD \|\| KMP_OS_NETBSD \|\| \
8955	KMP_OS_OPENBSD \|\| KMP_OS_WINDOWS \|\| KMP_OS_DARWIN \|\| KMP_OS_HAIKU \|\| \
8956	KMP_OS_HURD \|\| KMP_OS_SOLARIS \|\| KMP_OS_WASI \|\| KMP_OS_AIX
8957
8958	int teamsize_cutoff = `4`;
8959
8960	#if KMP_MIC_SUPPORTED
8961	if (__kmp_mic_type != non_mic) {
8962	teamsize_cutoff = `8`;
8963	}
8964	#endif
8965	int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8966	if (tree_available) {
8967	if (team_size <= teamsize_cutoff) {
8968	if (atomic_available) {
8969	retval = atomic_reduce_block;
8970	}
8971	} else {
8972	retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8973	}
8974	} else if (atomic_available) {
8975	retval = atomic_reduce_block;
8976	}
8977	#else
8978	#error "Unknown or unsupported OS"
8979	#endif // KMP_OS_LINUX \|\| KMP_OS_DRAGONFLY \|\| KMP_OS_FREEBSD \|\| KMP_OS_NETBSD \|\|
8980	// KMP_OS_OPENBSD \|\| KMP_OS_WINDOWS \|\| KMP_OS_DARWIN \|\| KMP_OS_HAIKU \|\|
8981	// KMP_OS_HURD \|\| KMP_OS_SOLARIS \|\| KMP_OS_WASI \|\| KMP_OS_AIX
8982
8983	#elif KMP_ARCH_X86 \|\| KMP_ARCH_ARM \|\| KMP_ARCH_AARCH \|\| KMP_ARCH_MIPS \|\| \
8984	KMP_ARCH_WASM \|\| KMP_ARCH_PPC \|\| KMP_ARCH_AARCH64_32 \|\| KMP_ARCH_SPARC
8985
8986	#if KMP_OS_LINUX \|\| KMP_OS_DRAGONFLY \|\| KMP_OS_FREEBSD \|\| KMP_OS_NETBSD \|\| \
8987	KMP_OS_OPENBSD \|\| KMP_OS_WINDOWS \|\| KMP_OS_HAIKU \|\| KMP_OS_HURD \|\| \
8988	KMP_OS_SOLARIS \|\| KMP_OS_WASI \|\| KMP_OS_AIX
8989
8990	// basic tuning
8991
8992	if (atomic_available) {
8993	if (num_vars <= `2`) { // && ( team_size <= 8 ) due to false-sharing ???
8994	retval = atomic_reduce_block;
8995	}
8996	} // otherwise: use critical section
8997
8998	#elif KMP_OS_DARWIN
8999
9000	int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
9001	if (atomic_available && (num_vars <= `3`)) {
9002	retval = atomic_reduce_block;
9003	} else if (tree_available) {
9004	if ((reduce_size > (`9` * sizeof(kmp_real64))) &&
9005	(reduce_size < (`2000` * sizeof(kmp_real64)))) {
9006	retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
9007	}
9008	} // otherwise: use critical section
9009
9010	#else
9011	#error "Unknown or unsupported OS"
9012	#endif
9013
9014	#else
9015	#error "Unknown or unsupported architecture"
9016	#endif
9017	}
9018
9019	// KMP_FORCE_REDUCTION
9020
9021	// If the team is serialized (team_size == 1), ignore the forced reduction
9022	// method and stay with the unsynchronized method (empty_reduce_block)
9023	if (__kmp_force_reduction_method != reduction_method_not_defined &&
9024	team_size != `1`) {
9025
9026	PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
9027
9028	int atomic_available, tree_available;
9029
9030	switch ((forced_retval = __kmp_force_reduction_method)) {
9031	case critical_reduce_block:
9032	KMP_ASSERT(lck); // lck should be != 0
9033	break;
9034
9035	case atomic_reduce_block:
9036	atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
9037	if (!atomic_available) {
9038	KMP_WARNING(RedMethodNotSupported, "atomic");
9039	forced_retval = critical_reduce_block;
9040	}
9041	break;
9042
9043	case tree_reduce_block:
9044	tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
9045	if (!tree_available) {
9046	KMP_WARNING(RedMethodNotSupported, "tree");
9047	forced_retval = critical_reduce_block;
9048	} else {
9049	#if KMP_FAST_REDUCTION_BARRIER
9050	forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
9051	#endif
9052	}
9053	break;
9054
9055	default:
9056	KMP_ASSERT(`0`); // "unsupported method specified"
9057	}
9058
9059	retval = forced_retval;
9060	}
9061
9062	KA_TRACE(`10`, ("reduction method selected=%08x\n", retval));
9063
9064	#undef FAST_REDUCTION_TREE_METHOD_GENERATED
9065	#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
9066
9067	return (retval);
9068	}
9069	// this function is for testing set/get/determine reduce method
9070	kmp_int32 __kmp_get_reduce_method(void) {
9071	return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> `8`);
9072	}
9073
9074	// Soft pause sets up threads to ignore blocktime and just go to sleep.
9075	// Spin-wait code checks __kmp_pause_status and reacts accordingly.
9076	void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
9077
9078	// Hard pause shuts down the runtime completely. Resume happens naturally when
9079	// OpenMP is used subsequently.
9080	void __kmp_hard_pause() {
9081	__kmp_pause_status = kmp_hard_paused;
9082	__kmp_internal_end_thread(gtid_req: -`1`);
9083	}
9084
9085	// Soft resume sets __kmp_pause_status, and wakes up all threads.
9086	void __kmp_resume_if_soft_paused() {
9087	if (__kmp_pause_status == kmp_soft_paused) {
9088	__kmp_pause_status = kmp_not_paused;
9089
9090	for (int gtid = `1`; gtid < __kmp_threads_capacity; ++gtid) {
9091	kmp_info_t *thread = __kmp_threads[gtid];
9092	if (thread) { // Wake it if sleeping
9093	kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
9094	thread);
9095	if (fl.is_sleeping())
9096	fl.resume(th_gtid: gtid);
9097	else if (__kmp_try_suspend_mx(th: thread)) { // got suspend lock
9098	__kmp_unlock_suspend_mx(th: thread); // unlock it; it won't sleep
9099	} else { // thread holds the lock and may sleep soon
9100	do { // until either the thread sleeps, or we can get the lock
9101	if (fl.is_sleeping()) {
9102	fl.resume(th_gtid: gtid);
9103	break;
9104	} else if (__kmp_try_suspend_mx(th: thread)) {
9105	__kmp_unlock_suspend_mx(th: thread);
9106	break;
9107	}
9108	} while (`1`);
9109	}
9110	}
9111	}
9112	}
9113	}
9114
9115	// This function is called via __kmpc_pause_resource. Returns 0 if successful.
9116	// TODO: add warning messages
9117	int __kmp_pause_resource(kmp_pause_status_t level) {
9118	if (level == kmp_not_paused) { // requesting resume
9119	if (__kmp_pause_status == kmp_not_paused) {
9120	// error message about runtime not being paused, so can't resume
9121	return `1`;
9122	} else {
9123	KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused \|\|
9124	__kmp_pause_status == kmp_hard_paused);
9125	__kmp_pause_status = kmp_not_paused;
9126	return `0`;
9127	}
9128	} else if (level == kmp_soft_paused) { // requesting soft pause
9129	if (__kmp_pause_status != kmp_not_paused) {
9130	// error message about already being paused
9131	return `1`;
9132	} else {
9133	__kmp_soft_pause();
9134	return `0`;
9135	}
9136	} else if (level == kmp_hard_paused \|\| level == kmp_stop_tool_paused) {
9137	// requesting hard pause or stop_tool pause
9138	if (__kmp_pause_status != kmp_not_paused) {
9139	// error message about already being paused
9140	return `1`;
9141	} else {
9142	__kmp_hard_pause();
9143	return `0`;
9144	}
9145	} else {
9146	// error message about invalid level
9147	return `1`;
9148	}
9149	}
9150
9151	void __kmp_omp_display_env(int verbose) {
9152	__kmp_acquire_bootstrap_lock(lck: &__kmp_initz_lock);
9153	if (__kmp_init_serial == `0`)
9154	__kmp_do_serial_initialize();
9155	__kmp_display_env_impl(display_env: !verbose, display_env_verbose: verbose);
9156	__kmp_release_bootstrap_lock(lck: &__kmp_initz_lock);
9157	}
9158
9159	// The team size is changing, so distributed barrier must be modified
9160	void __kmp_resize_dist_barrier(kmp_team_t team, int* old_nthreads,
9161	int new_nthreads) {
9162	KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9163	bp_dist_bar);
9164	kmp_info_t **other_threads = team->t.t_threads;
9165
9166	// We want all the workers to stop waiting on the barrier while we adjust the
9167	// size of the team.
9168	for (int f = `1`; f < old_nthreads; ++f) {
9169	KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9170	// Ignore threads that are already inactive or not present in the team
9171	if (team->t.t_threads[f]->th.th_used_in_team.load() == `0`) {
9172	// teams construct causes thread_limit to get passed in, and some of
9173	// those could be inactive; just ignore them
9174	continue;
9175	}
9176	// If thread is transitioning still to in_use state, wait for it
9177	if (team->t.t_threads[f]->th.th_used_in_team.load() == `3`) {
9178	while (team->t.t_threads[f]->th.th_used_in_team.load() == `3`)
9179	KMP_CPU_PAUSE();
9180	}
9181	// The thread should be in_use now
9182	KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == `1`);
9183	// Transition to unused state
9184	team->t.t_threads[f]->th.th_used_in_team.store(i: `2`);
9185	KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == `2`);
9186	}
9187	// Release all the workers
9188	team->t.b->go_release();
9189
9190	KMP_MFENCE();
9191
9192	// Workers should see transition status 2 and move to 0; but may need to be
9193	// woken up first
9194	int count = old_nthreads - `1`;
9195	while (count > `0`) {
9196	count = old_nthreads - `1`;
9197	for (int f = `1`; f < old_nthreads; ++f) {
9198	if (other_threads[f]->th.th_used_in_team.load() != `0`) {
9199	if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers
9200	kmp_atomic_flag_64<> flag = (kmp_atomic_flag_64<> )CCAST(
9201	void *, other_threads[f]->th.th_sleep_loc);
9202	__kmp_atomic_resume_64(target_gtid: other_threads[f]->th.th_info.ds.ds_gtid, flag);
9203	}
9204	} else {
9205	KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == `0`);
9206	count--;
9207	}
9208	}
9209	}
9210	// Now update the barrier size
9211	team->t.b->update_num_threads(nthr: new_nthreads);
9212	team->t.b->go_reset();
9213	}
9214
9215	void __kmp_add_threads_to_team(kmp_team_t team, int* new_nthreads) {
9216	// Add the threads back to the team
9217	KMP_DEBUG_ASSERT(team);
9218	// Threads were paused and pointed at th_used_in_team temporarily during a
9219	// resize of the team. We're going to set th_used_in_team to 3 to indicate to
9220	// the thread that it should transition itself back into the team. Then, if
9221	// blocktime isn't infinite, the thread could be sleeping, so we send a resume
9222	// to wake it up.
9223	for (int f = `1`; f < new_nthreads; ++f) {
9224	KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9225	(void)KMP_COMPARE_AND_STORE_ACQ32(
9226	&(team->t.t_threads[f]->th.th_used_in_team), `0`, `3`);
9227	if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads
9228	__kmp_resume_32(target_gtid: team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9229	flag: (kmp_flag_32<false, false> *)NULL);
9230	}
9231	}
9232	// The threads should be transitioning to the team; when they are done, they
9233	// should have set th_used_in_team to 1. This loop forces master to wait until
9234	// all threads have moved into the team and are waiting in the barrier.
9235	int count = new_nthreads - `1`;
9236	while (count > `0`) {
9237	count = new_nthreads - `1`;
9238	for (int f = `1`; f < new_nthreads; ++f) {
9239	if (team->t.t_threads[f]->th.th_used_in_team.load() == `1`) {
9240	count--;
9241	}
9242	}
9243	}
9244	}
9245
9246	// Globals and functions for hidden helper task
9247	kmp_info_t **__kmp_hidden_helper_threads;
9248	kmp_info_t *__kmp_hidden_helper_main_thread;
9249	std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9250	#if KMP_OS_LINUX
9251	kmp_int32 __kmp_hidden_helper_threads_num = `8`;
9252	kmp_int32 __kmp_enable_hidden_helper = TRUE;
9253	#else
9254	kmp_int32 __kmp_hidden_helper_threads_num = `0`;
9255	kmp_int32 __kmp_enable_hidden_helper = FALSE;
9256	#endif
9257
9258	namespace {
9259	std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9260
9261	void __kmp_hidden_helper_wrapper_fn(int gtid, int* *, ...) {
9262	// This is an explicit synchronization on all hidden helper threads in case
9263	// that when a regular thread pushes a hidden helper task to one hidden
9264	// helper thread, the thread has not been awaken once since they're released
9265	// by the main thread after creating the team.
9266	KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9267	while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9268	__kmp_hidden_helper_threads_num)
9269	;
9270
9271	// If main thread, then wait for signal
9272	if (__kmpc_master(nullptr, global_tid: *gtid)) {
9273	// First, unset the initial state and release the initial thread
9274	TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9275	__kmp_hidden_helper_initz_release();
9276	__kmp_hidden_helper_main_thread_wait();
9277	// Now wake up all worker threads
9278	for (int i = `1`; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9279	__kmp_hidden_helper_worker_thread_signal();
9280	}
9281	}
9282	}
9283	} // namespace
9284
9285	void __kmp_hidden_helper_threads_initz_routine() {
9286	// Create a new root for hidden helper team/threads
9287	const int gtid = __kmp_register_root(TRUE);
9288	__kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9289	__kmp_hidden_helper_threads = &__kmp_threads[gtid];
9290	__kmp_hidden_helper_main_thread->th.th_set_nproc =
9291	__kmp_hidden_helper_threads_num;
9292
9293	KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, `0`);
9294
9295	__kmpc_fork_call(nullptr, nargs: `0`, microtask: __kmp_hidden_helper_wrapper_fn);
9296
9297	// Set the initialization flag to FALSE
9298	TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9299
9300	__kmp_hidden_helper_threads_deinitz_release();
9301	}
9302
9303	/ Nesting Mode:*
9304	Set via KMP_NESTING_MODE, which takes an integer.
9305	Note: we skip duplicate topology levels, and skip levels with only
9306	one entity.
9307	KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode.
9308	KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels
9309	in the topology, and initializes the number of threads at each of those
9310	levels to the number of entities at each level, respectively, below the
9311	entity at the parent level.
9312	KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels,
9313	but starts with nesting OFF -- max-active-levels-var is 1 -- and requires
9314	the user to turn nesting on explicitly. This is an even more experimental
9315	option to this experimental feature, and may change or go away in the
9316	future.
9317	*/
9318
9319	// Allocate space to store nesting levels
9320	void __kmp_init_nesting_mode() {
9321	int levels = KMP_HW_LAST;
9322	__kmp_nesting_mode_nlevels = levels;
9323	__kmp_nesting_nth_level = (int )KMP_INTERNAL_MALLOC(levels sizeof(int));
9324	for (int i = `0`; i < levels; ++i)
9325	__kmp_nesting_nth_level[i] = `0`;
9326	if (__kmp_nested_nth.size < levels) {
9327	__kmp_nested_nth.nth =
9328	(int )KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels sizeof(int));
9329	__kmp_nested_nth.size = levels;
9330	}
9331	}
9332
9333	// Set # threads for top levels of nesting; must be called after topology set
9334	void __kmp_set_nesting_mode_threads() {
9335	kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9336
9337	if (__kmp_nesting_mode == `1`)
9338	__kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9339	else if (__kmp_nesting_mode > `1`)
9340	__kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9341
9342	if (__kmp_topology) { // use topology info
9343	int loc, hw_level;
9344	for (loc = `0`, hw_level = `0`; hw_level < __kmp_topology->get_depth() &&
9345	loc < __kmp_nesting_mode_nlevels;
9346	loc++, hw_level++) {
9347	__kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(level: hw_level);
9348	if (__kmp_nesting_nth_level[loc] == `1`)
9349	loc--;
9350	}
9351	// Make sure all cores are used
9352	if (__kmp_nesting_mode > `1` && loc > `1`) {
9353	int core_level = __kmp_topology->get_level(type: KMP_HW_CORE);
9354	int num_cores = __kmp_topology->get_count(level: core_level);
9355	int upper_levels = `1`;
9356	for (int level = `0`; level < loc - `1`; ++level)
9357	upper_levels *= __kmp_nesting_nth_level[level];
9358	if (upper_levels * __kmp_nesting_nth_level[loc - `1`] < num_cores)
9359	__kmp_nesting_nth_level[loc - `1`] =
9360	num_cores / __kmp_nesting_nth_level[loc - `2`];
9361	}
9362	__kmp_nesting_mode_nlevels = loc;
9363	__kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9364	} else { // no topology info available; provide a reasonable guesstimation
9365	if (__kmp_avail_proc >= `4`) {
9366	__kmp_nesting_nth_level[`0`] = __kmp_avail_proc / `2`;
9367	__kmp_nesting_nth_level[`1`] = `2`;
9368	__kmp_nesting_mode_nlevels = `2`;
9369	} else {
9370	__kmp_nesting_nth_level[`0`] = __kmp_avail_proc;
9371	__kmp_nesting_mode_nlevels = `1`;
9372	}
9373	__kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9374	}
9375	for (int i = `0`; i < __kmp_nesting_mode_nlevels; ++i) {
9376	__kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9377	}
9378	set__nproc(thread, __kmp_nesting_nth_level[`0`]);
9379	if (__kmp_nesting_mode > `1` && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9380	__kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9381	if (get__max_active_levels(thread) > `1`) {
9382	// if max levels was set, set nesting mode levels to same
9383	__kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9384	}
9385	if (__kmp_nesting_mode == `1`) // turn on nesting for this case only
9386	set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9387	}
9388
9389	// Empty symbols to export (see exports_so.txt) when feature is disabled
9390	extern "C" {
9391	#if !KMP_STATS_ENABLED
9392	void __kmp_reset_stats() {}
9393	#endif
9394	#if !USE_DEBUGGER
9395	int __kmp_omp_debug_struct_info = FALSE;
9396	int __kmp_debugging = FALSE;
9397	#endif
9398	#if !USE_ITT_BUILD \|\| !USE_ITT_NOTIFY
9399	void __kmp_itt_fini_ittlib() {}
9400	void __kmp_itt_init_ittlib() {}
9401	#endif
9402	}
9403
9404	// end of file
9405

source code of openmp/runtime/src/kmp_runtime.cpp