1 | /* |
2 | * kmp_global.cpp -- KPTS global variables for runtime support library |
3 | */ |
4 | |
5 | //===----------------------------------------------------------------------===// |
6 | // |
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | // See https://llvm.org/LICENSE.txt for license information. |
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "kmp.h" |
14 | #include "kmp_affinity.h" |
15 | #if KMP_USE_HIER_SCHED |
16 | #include "kmp_dispatch_hier.h" |
17 | #endif |
18 | |
19 | kmp_key_t __kmp_gtid_threadprivate_key; |
20 | |
21 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
22 | kmp_cpuinfo_t __kmp_cpuinfo = {.initialized: 0}; // Not initialized |
23 | #endif |
24 | |
25 | #if KMP_STATS_ENABLED |
26 | #include "kmp_stats.h" |
27 | // lock for modifying the global __kmp_stats_list |
28 | kmp_tas_lock_t __kmp_stats_lock; |
29 | |
30 | // global list of per thread stats, the head is a sentinel node which |
31 | // accumulates all stats produced before __kmp_create_worker is called. |
32 | kmp_stats_list *__kmp_stats_list; |
33 | |
34 | // thread local pointer to stats node within list |
35 | KMP_THREAD_LOCAL kmp_stats_list *__kmp_stats_thread_ptr = NULL; |
36 | |
37 | // gives reference tick for all events (considered the 0 tick) |
38 | tsc_tick_count __kmp_stats_start_time; |
39 | #endif |
40 | |
41 | /* ----------------------------------------------------- */ |
42 | /* INITIALIZATION VARIABLES */ |
43 | /* they are syncronized to write during init, but read anytime */ |
44 | volatile int __kmp_init_serial = FALSE; |
45 | volatile int __kmp_init_gtid = FALSE; |
46 | volatile int __kmp_init_common = FALSE; |
47 | volatile int __kmp_need_register_serial = TRUE; |
48 | volatile int __kmp_init_middle = FALSE; |
49 | volatile int __kmp_init_parallel = FALSE; |
50 | volatile int __kmp_init_hidden_helper = FALSE; |
51 | volatile int __kmp_init_hidden_helper_threads = FALSE; |
52 | volatile int __kmp_hidden_helper_team_done = FALSE; |
53 | #if KMP_USE_MONITOR |
54 | volatile int __kmp_init_monitor = |
55 | 0; /* 1 - launched, 2 - actually started (Windows* OS only) */ |
56 | #endif |
57 | volatile int __kmp_init_user_locks = FALSE; |
58 | |
59 | /* list of address of allocated caches for commons */ |
60 | kmp_cached_addr_t *__kmp_threadpriv_cache_list = NULL; |
61 | |
62 | int __kmp_init_counter = 0; |
63 | int __kmp_root_counter = 0; |
64 | int __kmp_version = 0; |
65 | |
66 | std::atomic<kmp_int32> __kmp_team_counter = 0; |
67 | std::atomic<kmp_int32> __kmp_task_counter = 0; |
68 | |
69 | size_t __kmp_stksize = KMP_DEFAULT_STKSIZE; |
70 | #if KMP_USE_MONITOR |
71 | size_t __kmp_monitor_stksize = 0; // auto adjust |
72 | #endif |
73 | size_t __kmp_stkoffset = KMP_DEFAULT_STKOFFSET; |
74 | int __kmp_stkpadding = KMP_MIN_STKPADDING; |
75 | |
76 | size_t __kmp_malloc_pool_incr = KMP_DEFAULT_MALLOC_POOL_INCR; |
77 | |
78 | // Barrier method defaults, settings, and strings. |
79 | // branch factor = 2^branch_bits (only relevant for tree & hyper barrier types) |
80 | kmp_uint32 __kmp_barrier_gather_bb_dflt = 2; |
81 | /* branch_factor = 4 */ /* hyper2: C78980 */ |
82 | kmp_uint32 __kmp_barrier_release_bb_dflt = 2; |
83 | /* branch_factor = 4 */ /* hyper2: C78980 */ |
84 | |
85 | kmp_bar_pat_e __kmp_barrier_gather_pat_dflt = bp_hyper_bar; |
86 | /* hyper2: C78980 */ |
87 | kmp_bar_pat_e __kmp_barrier_release_pat_dflt = bp_hyper_bar; |
88 | /* hyper2: C78980 */ |
89 | |
90 | kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier] = {0}; |
91 | kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier] = {0}; |
92 | kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier] = {bp_linear_bar}; |
93 | kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier] = {bp_linear_bar}; |
94 | char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier] = { |
95 | "KMP_PLAIN_BARRIER" , "KMP_FORKJOIN_BARRIER" |
96 | #if KMP_FAST_REDUCTION_BARRIER |
97 | , |
98 | "KMP_REDUCTION_BARRIER" |
99 | #endif // KMP_FAST_REDUCTION_BARRIER |
100 | }; |
101 | char const *__kmp_barrier_pattern_env_name[bs_last_barrier] = { |
102 | "KMP_PLAIN_BARRIER_PATTERN" , "KMP_FORKJOIN_BARRIER_PATTERN" |
103 | #if KMP_FAST_REDUCTION_BARRIER |
104 | , |
105 | "KMP_REDUCTION_BARRIER_PATTERN" |
106 | #endif // KMP_FAST_REDUCTION_BARRIER |
107 | }; |
108 | char const *__kmp_barrier_type_name[bs_last_barrier] = {"plain" , "forkjoin" |
109 | #if KMP_FAST_REDUCTION_BARRIER |
110 | , |
111 | "reduction" |
112 | #endif // KMP_FAST_REDUCTION_BARRIER |
113 | }; |
114 | char const *__kmp_barrier_pattern_name[bp_last_bar] = { |
115 | "linear" , "tree" , "hyper" , "hierarchical" , "dist" }; |
116 | |
117 | int __kmp_allThreadsSpecified = 0; |
118 | size_t __kmp_align_alloc = CACHE_LINE; |
119 | |
120 | int __kmp_generate_warnings = kmp_warnings_low; |
121 | int __kmp_reserve_warn = 0; |
122 | int __kmp_xproc = 0; |
123 | int __kmp_avail_proc = 0; |
124 | size_t __kmp_sys_min_stksize = KMP_MIN_STKSIZE; |
125 | int __kmp_sys_max_nth = KMP_MAX_NTH; |
126 | int __kmp_max_nth = 0; |
127 | int __kmp_cg_max_nth = 0; |
128 | int __kmp_task_max_nth = 0; |
129 | int __kmp_teams_max_nth = 0; |
130 | int __kmp_threads_capacity = 0; |
131 | int __kmp_dflt_team_nth = 0; |
132 | int __kmp_dflt_team_nth_ub = 0; |
133 | int __kmp_tp_capacity = 0; |
134 | int __kmp_tp_cached = 0; |
135 | int __kmp_dispatch_num_buffers = KMP_DFLT_DISP_NUM_BUFF; |
136 | int __kmp_dflt_max_active_levels = 1; // Nesting off by default |
137 | bool __kmp_dflt_max_active_levels_set = false; // Don't override set value |
138 | #if KMP_NESTED_HOT_TEAMS |
139 | int __kmp_hot_teams_mode = 0; /* 0 - free extra threads when reduced */ |
140 | /* 1 - keep extra threads when reduced */ |
141 | int __kmp_hot_teams_max_level = 1; /* nesting level of hot teams */ |
142 | #endif |
143 | enum library_type __kmp_library = library_none; |
144 | enum sched_type __kmp_sched = |
145 | kmp_sch_default; /* scheduling method for runtime scheduling */ |
146 | enum sched_type __kmp_static = |
147 | kmp_sch_static_greedy; /* default static scheduling method */ |
148 | enum sched_type __kmp_guided = |
149 | kmp_sch_guided_iterative_chunked; /* default guided scheduling method */ |
150 | enum sched_type __kmp_auto = |
151 | kmp_sch_guided_analytical_chunked; /* default auto scheduling method */ |
152 | #if KMP_USE_HIER_SCHED |
153 | int __kmp_dispatch_hand_threading = 0; |
154 | int __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LAST + 1]; |
155 | int __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LAST + 1]; |
156 | kmp_hier_sched_env_t __kmp_hier_scheds = {0, 0, NULL, NULL, NULL}; |
157 | #endif |
158 | int __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; // in microseconds |
159 | char __kmp_blocktime_units = 'm'; // Units specified in KMP_BLOCKTIME |
160 | bool __kmp_wpolicy_passive = false; |
161 | #if KMP_USE_MONITOR |
162 | int __kmp_monitor_wakeups = KMP_MIN_MONITOR_WAKEUPS; |
163 | int __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(KMP_DEFAULT_BLOCKTIME, |
164 | KMP_MIN_MONITOR_WAKEUPS); |
165 | #endif |
166 | #ifdef KMP_ADJUST_BLOCKTIME |
167 | int __kmp_zero_bt = FALSE; |
168 | #endif /* KMP_ADJUST_BLOCKTIME */ |
169 | #ifdef KMP_DFLT_NTH_CORES |
170 | int __kmp_ncores = 0; |
171 | #endif |
172 | int __kmp_chunk = 0; |
173 | int __kmp_force_monotonic = 0; |
174 | int __kmp_abort_delay = 0; |
175 | #if (KMP_OS_LINUX || KMP_OS_AIX) && defined(KMP_TDATA_GTID) |
176 | int __kmp_gtid_mode = 3; /* use __declspec(thread) TLS to store gtid */ |
177 | int __kmp_adjust_gtid_mode = FALSE; |
178 | #elif KMP_OS_WINDOWS |
179 | int __kmp_gtid_mode = 2; /* use TLS functions to store gtid */ |
180 | int __kmp_adjust_gtid_mode = FALSE; |
181 | #else |
182 | int __kmp_gtid_mode = 0; /* select method to get gtid based on #threads */ |
183 | int __kmp_adjust_gtid_mode = TRUE; |
184 | #endif /* KMP_OS_LINUX && defined(KMP_TDATA_GTID) */ |
185 | #ifdef KMP_TDATA_GTID |
186 | KMP_THREAD_LOCAL int __kmp_gtid = KMP_GTID_DNE; |
187 | #endif /* KMP_TDATA_GTID */ |
188 | int __kmp_tls_gtid_min = INT_MAX; |
189 | int __kmp_foreign_tp = TRUE; |
190 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
191 | int __kmp_inherit_fp_control = TRUE; |
192 | kmp_int16 __kmp_init_x87_fpu_control_word = 0; |
193 | kmp_uint32 __kmp_init_mxcsr = 0; |
194 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
195 | |
196 | #ifdef USE_LOAD_BALANCE |
197 | double __kmp_load_balance_interval = 1.0; |
198 | #endif /* USE_LOAD_BALANCE */ |
199 | |
200 | kmp_nested_nthreads_t __kmp_nested_nth = {NULL, .size: 0, .used: 0}; |
201 | |
202 | #if KMP_USE_ADAPTIVE_LOCKS |
203 | |
204 | kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params = { |
205 | .max_soft_retries: 1, .max_badness: 1024}; // TODO: tune it! |
206 | |
207 | #if KMP_DEBUG_ADAPTIVE_LOCKS |
208 | const char *__kmp_speculative_statsfile = "-" ; |
209 | #endif |
210 | |
211 | #endif // KMP_USE_ADAPTIVE_LOCKS |
212 | |
213 | int __kmp_display_env = FALSE; |
214 | int __kmp_display_env_verbose = FALSE; |
215 | int __kmp_omp_cancellation = FALSE; |
216 | int __kmp_nteams = 0; |
217 | int __kmp_teams_thread_limit = 0; |
218 | |
219 | #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT |
220 | int __kmp_user_level_mwait = FALSE; |
221 | int __kmp_umwait_enabled = FALSE; |
222 | int __kmp_mwait_enabled = FALSE; |
223 | int __kmp_mwait_hints = 0; |
224 | #endif |
225 | |
226 | #if KMP_HAVE_UMWAIT |
227 | int __kmp_waitpkg_enabled = 0; |
228 | int __kmp_tpause_state = 0; |
229 | int __kmp_tpause_hint = 1; |
230 | int __kmp_tpause_enabled = 0; |
231 | #endif |
232 | |
233 | /* map OMP 3.0 schedule types with our internal schedule types */ |
234 | enum sched_type __kmp_sch_map[kmp_sched_upper - kmp_sched_lower_ext + |
235 | kmp_sched_upper_std - kmp_sched_lower - 2] = { |
236 | kmp_sch_static_chunked, // ==> kmp_sched_static = 1 |
237 | kmp_sch_dynamic_chunked, // ==> kmp_sched_dynamic = 2 |
238 | kmp_sch_guided_chunked, // ==> kmp_sched_guided = 3 |
239 | kmp_sch_auto, // ==> kmp_sched_auto = 4 |
240 | kmp_sch_trapezoidal // ==> kmp_sched_trapezoidal = 101 |
241 | // will likely not be used, introduced here just to debug the code |
242 | // of public intel extension schedules |
243 | }; |
244 | |
245 | #if KMP_OS_LINUX |
246 | enum clock_function_type __kmp_clock_function; |
247 | int __kmp_clock_function_param; |
248 | #endif /* KMP_OS_LINUX */ |
249 | |
250 | #if KMP_MIC_SUPPORTED |
251 | enum mic_type __kmp_mic_type = non_mic; |
252 | #endif |
253 | |
254 | #if KMP_AFFINITY_SUPPORTED |
255 | |
256 | KMPAffinity *__kmp_affinity_dispatch = NULL; |
257 | |
258 | #if KMP_USE_HWLOC |
259 | int __kmp_hwloc_error = FALSE; |
260 | hwloc_topology_t __kmp_hwloc_topology = NULL; |
261 | #endif |
262 | |
263 | #if KMP_OS_WINDOWS |
264 | #if KMP_GROUP_AFFINITY |
265 | int __kmp_num_proc_groups = 1; |
266 | #endif /* KMP_GROUP_AFFINITY */ |
267 | kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount = NULL; |
268 | kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount = NULL; |
269 | kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity = NULL; |
270 | kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity = NULL; |
271 | #endif /* KMP_OS_WINDOWS */ |
272 | |
273 | size_t __kmp_affin_mask_size = 0; |
274 | enum affinity_top_method __kmp_affinity_top_method = |
275 | affinity_top_method_default; |
276 | |
277 | // Regular thread affinity settings from KMP_AFFINITY |
278 | kmp_affinity_t __kmp_affinity = KMP_AFFINITY_INIT("KMP_AFFINITY" ); |
279 | // Hidden helper thread affinity settings from KMP_HIDDEN_HELPER_AFFINITY |
280 | kmp_affinity_t __kmp_hh_affinity = |
281 | KMP_AFFINITY_INIT("KMP_HIDDEN_HELPER_AFFINITY" ); |
282 | kmp_affinity_t *__kmp_affinities[] = {&__kmp_affinity, &__kmp_hh_affinity}; |
283 | |
284 | char *__kmp_cpuinfo_file = NULL; |
285 | #if KMP_WEIGHTED_ITERATIONS_SUPPORTED |
286 | int __kmp_first_osid_with_ecore = -1; |
287 | #endif |
288 | |
289 | #endif /* KMP_AFFINITY_SUPPORTED */ |
290 | |
291 | kmp_nested_proc_bind_t __kmp_nested_proc_bind = {NULL, .size: 0, .used: 0}; |
292 | kmp_proc_bind_t __kmp_teams_proc_bind = proc_bind_spread; |
293 | int __kmp_affinity_num_places = 0; |
294 | int __kmp_display_affinity = FALSE; |
295 | char *__kmp_affinity_format = NULL; |
296 | |
297 | kmp_int32 __kmp_default_device = 0; |
298 | |
299 | kmp_tasking_mode_t __kmp_tasking_mode = tskm_task_teams; |
300 | kmp_int32 __kmp_max_task_priority = 0; |
301 | kmp_uint64 __kmp_taskloop_min_tasks = 0; |
302 | |
303 | int __kmp_memkind_available = 0; |
304 | omp_allocator_handle_t const omp_null_allocator = NULL; |
305 | omp_allocator_handle_t const omp_default_mem_alloc = |
306 | (omp_allocator_handle_t const)1; |
307 | omp_allocator_handle_t const omp_large_cap_mem_alloc = |
308 | (omp_allocator_handle_t const)2; |
309 | omp_allocator_handle_t const omp_const_mem_alloc = |
310 | (omp_allocator_handle_t const)3; |
311 | omp_allocator_handle_t const omp_high_bw_mem_alloc = |
312 | (omp_allocator_handle_t const)4; |
313 | omp_allocator_handle_t const omp_low_lat_mem_alloc = |
314 | (omp_allocator_handle_t const)5; |
315 | omp_allocator_handle_t const omp_cgroup_mem_alloc = |
316 | (omp_allocator_handle_t const)6; |
317 | omp_allocator_handle_t const omp_pteam_mem_alloc = |
318 | (omp_allocator_handle_t const)7; |
319 | omp_allocator_handle_t const omp_thread_mem_alloc = |
320 | (omp_allocator_handle_t const)8; |
321 | omp_allocator_handle_t const llvm_omp_target_host_mem_alloc = |
322 | (omp_allocator_handle_t const)100; |
323 | omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc = |
324 | (omp_allocator_handle_t const)101; |
325 | omp_allocator_handle_t const llvm_omp_target_device_mem_alloc = |
326 | (omp_allocator_handle_t const)102; |
327 | omp_allocator_handle_t const kmp_max_mem_alloc = |
328 | (omp_allocator_handle_t const)1024; |
329 | omp_allocator_handle_t __kmp_def_allocator = omp_default_mem_alloc; |
330 | |
331 | omp_memspace_handle_t const omp_default_mem_space = |
332 | (omp_memspace_handle_t const)0; |
333 | omp_memspace_handle_t const omp_large_cap_mem_space = |
334 | (omp_memspace_handle_t const)1; |
335 | omp_memspace_handle_t const omp_const_mem_space = |
336 | (omp_memspace_handle_t const)2; |
337 | omp_memspace_handle_t const omp_high_bw_mem_space = |
338 | (omp_memspace_handle_t const)3; |
339 | omp_memspace_handle_t const omp_low_lat_mem_space = |
340 | (omp_memspace_handle_t const)4; |
341 | omp_memspace_handle_t const llvm_omp_target_host_mem_space = |
342 | (omp_memspace_handle_t const)100; |
343 | omp_memspace_handle_t const llvm_omp_target_shared_mem_space = |
344 | (omp_memspace_handle_t const)101; |
345 | omp_memspace_handle_t const llvm_omp_target_device_mem_space = |
346 | (omp_memspace_handle_t const)102; |
347 | |
348 | /* This check ensures that the compiler is passing the correct data type for the |
349 | flags formal parameter of the function kmpc_omp_task_alloc(). If the type is |
350 | not a 4-byte type, then give an error message about a non-positive length |
351 | array pointing here. If that happens, the kmp_tasking_flags_t structure must |
352 | be redefined to have exactly 32 bits. */ |
353 | KMP_BUILD_ASSERT(sizeof(kmp_tasking_flags_t) == 4); |
354 | |
355 | int __kmp_task_stealing_constraint = 1; /* Constrain task stealing by default */ |
356 | int __kmp_enable_task_throttling = 1; |
357 | |
358 | #ifdef DEBUG_SUSPEND |
359 | int __kmp_suspend_count = 0; |
360 | #endif |
361 | |
362 | int __kmp_settings = FALSE; |
363 | int __kmp_duplicate_library_ok = 0; |
364 | #if USE_ITT_BUILD |
365 | int __kmp_forkjoin_frames = 1; |
366 | int __kmp_forkjoin_frames_mode = 3; |
367 | #endif |
368 | PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method = |
369 | reduction_method_not_defined; |
370 | int __kmp_determ_red = FALSE; |
371 | |
372 | #ifdef KMP_DEBUG |
373 | int kmp_a_debug = 0; |
374 | int kmp_b_debug = 0; |
375 | int kmp_c_debug = 0; |
376 | int kmp_d_debug = 0; |
377 | int kmp_e_debug = 0; |
378 | int kmp_f_debug = 0; |
379 | int kmp_diag = 0; |
380 | #endif |
381 | |
382 | /* For debug information logging using rotating buffer */ |
383 | int __kmp_debug_buf = |
384 | FALSE; /* TRUE means use buffer, FALSE means print to stderr */ |
385 | int __kmp_debug_buf_lines = |
386 | KMP_DEBUG_BUF_LINES_INIT; /* Lines of debug stored in buffer */ |
387 | int __kmp_debug_buf_chars = |
388 | KMP_DEBUG_BUF_CHARS_INIT; /* Characters allowed per line in buffer */ |
389 | int __kmp_debug_buf_atomic = |
390 | FALSE; /* TRUE means use atomic update of buffer entry pointer */ |
391 | |
392 | char *__kmp_debug_buffer = NULL; /* Debug buffer itself */ |
393 | std::atomic<int> __kmp_debug_count = |
394 | 0; /* number of lines printed in buffer so far */ |
395 | int __kmp_debug_buf_warn_chars = |
396 | 0; /* Keep track of char increase recommended in warnings */ |
397 | /* end rotating debug buffer */ |
398 | |
399 | #ifdef KMP_DEBUG |
400 | int __kmp_par_range; /* +1 => only go par for constructs in range */ |
401 | /* -1 => only go par for constructs outside range */ |
402 | char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN] = {'\0'}; |
403 | char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN] = {'\0'}; |
404 | int __kmp_par_range_lb = 0; |
405 | int __kmp_par_range_ub = INT_MAX; |
406 | #endif /* KMP_DEBUG */ |
407 | |
408 | /* For printing out dynamic storage map for threads and teams */ |
409 | int __kmp_storage_map = |
410 | FALSE; /* True means print storage map for threads and teams */ |
411 | int __kmp_storage_map_verbose = |
412 | FALSE; /* True means storage map includes placement info */ |
413 | int __kmp_storage_map_verbose_specified = FALSE; |
414 | /* Initialize the library data structures when we fork a child process, defaults |
415 | * to TRUE */ |
416 | int __kmp_need_register_atfork = |
417 | TRUE; /* At initialization, call pthread_atfork to install fork handler */ |
418 | int __kmp_need_register_atfork_specified = TRUE; |
419 | |
420 | int __kmp_env_stksize = FALSE; /* KMP_STACKSIZE specified? */ |
421 | int __kmp_env_blocktime = FALSE; /* KMP_BLOCKTIME specified? */ |
422 | int __kmp_env_checks = FALSE; /* KMP_CHECKS specified? */ |
423 | int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK specified? */ |
424 | |
425 | // From KMP_USE_YIELD: |
426 | // 0 = never yield; |
427 | // 1 = always yield (default); |
428 | // 2 = yield only if oversubscribed |
429 | #if KMP_OS_DARWIN && KMP_ARCH_AARCH64 |
430 | // Set to 0 for environments where yield is slower |
431 | kmp_int32 __kmp_use_yield = 0; |
432 | #else |
433 | kmp_int32 __kmp_use_yield = 1; |
434 | #endif |
435 | |
436 | // This will be 1 if KMP_USE_YIELD environment variable was set explicitly |
437 | kmp_int32 __kmp_use_yield_exp_set = 0; |
438 | |
439 | kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT; |
440 | kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT; |
441 | kmp_uint64 __kmp_pause_init = 1; // for tpause |
442 | |
443 | /* ------------------------------------------------------ */ |
444 | /* STATE mostly syncronized with global lock */ |
445 | /* data written to rarely by primary threads, read often by workers */ |
446 | /* TODO: None of this global padding stuff works consistently because the order |
447 | of declaration is not necessarily correlated to storage order. To fix this, |
448 | all the important globals must be put in a big structure instead. */ |
449 | KMP_ALIGN_CACHE |
450 | kmp_info_t **__kmp_threads = NULL; |
451 | kmp_root_t **__kmp_root = NULL; |
452 | kmp_old_threads_list_t *__kmp_old_threads_list = NULL; |
453 | |
454 | /* data read/written to often by primary threads */ |
455 | KMP_ALIGN_CACHE |
456 | volatile int __kmp_nth = 0; |
457 | volatile int __kmp_all_nth = 0; |
458 | volatile kmp_info_t *__kmp_thread_pool = NULL; |
459 | volatile kmp_team_t *__kmp_team_pool = NULL; |
460 | |
461 | KMP_ALIGN_CACHE |
462 | std::atomic<int> __kmp_thread_pool_active_nth = 0; |
463 | |
464 | /* ------------------------------------------------- |
465 | * GLOBAL/ROOT STATE */ |
466 | KMP_ALIGN_CACHE |
467 | kmp_global_t __kmp_global; |
468 | |
469 | /* ----------------------------------------------- */ |
470 | /* GLOBAL SYNCHRONIZATION LOCKS */ |
471 | /* TODO verify the need for these locks and if they need to be global */ |
472 | |
473 | #if KMP_USE_INTERNODE_ALIGNMENT |
474 | /* Multinode systems have larger cache line granularity which can cause |
475 | * false sharing if the alignment is not large enough for these locks */ |
476 | KMP_ALIGN_CACHE_INTERNODE |
477 | |
478 | KMP_BOOTSTRAP_LOCK_INIT(__kmp_initz_lock); /* Control initializations */ |
479 | KMP_ALIGN_CACHE_INTERNODE |
480 | KMP_BOOTSTRAP_LOCK_INIT(__kmp_forkjoin_lock); /* control fork/join access */ |
481 | KMP_ALIGN_CACHE_INTERNODE |
482 | KMP_BOOTSTRAP_LOCK_INIT(__kmp_exit_lock); /* exit() is not always thread-safe */ |
483 | #if KMP_USE_MONITOR |
484 | /* control monitor thread creation */ |
485 | KMP_ALIGN_CACHE_INTERNODE |
486 | KMP_BOOTSTRAP_LOCK_INIT(__kmp_monitor_lock); |
487 | #endif |
488 | /* used for the hack to allow threadprivate cache and __kmp_threads expansion |
489 | to co-exist */ |
490 | KMP_ALIGN_CACHE_INTERNODE |
491 | KMP_BOOTSTRAP_LOCK_INIT(__kmp_tp_cached_lock); |
492 | |
493 | KMP_ALIGN_CACHE_INTERNODE |
494 | KMP_LOCK_INIT(__kmp_global_lock); /* Control OS/global access */ |
495 | KMP_ALIGN_CACHE_INTERNODE |
496 | kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ |
497 | KMP_ALIGN_CACHE_INTERNODE |
498 | KMP_LOCK_INIT(__kmp_debug_lock); /* Control I/O access for KMP_DEBUG */ |
499 | #else |
500 | KMP_ALIGN_CACHE |
501 | |
502 | KMP_BOOTSTRAP_LOCK_INIT(__kmp_initz_lock); /* Control initializations */ |
503 | KMP_BOOTSTRAP_LOCK_INIT(__kmp_forkjoin_lock); /* control fork/join access */ |
504 | KMP_BOOTSTRAP_LOCK_INIT(__kmp_exit_lock); /* exit() is not always thread-safe */ |
505 | #if KMP_USE_MONITOR |
506 | /* control monitor thread creation */ |
507 | KMP_BOOTSTRAP_LOCK_INIT(__kmp_monitor_lock); |
508 | #endif |
509 | /* used for the hack to allow threadprivate cache and __kmp_threads expansion |
510 | to co-exist */ |
511 | KMP_BOOTSTRAP_LOCK_INIT(__kmp_tp_cached_lock); |
512 | |
513 | KMP_ALIGN(128) |
514 | KMP_LOCK_INIT(__kmp_global_lock); /* Control OS/global access */ |
515 | KMP_ALIGN(128) |
516 | kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ |
517 | KMP_ALIGN(128) |
518 | KMP_LOCK_INIT(__kmp_debug_lock); /* Control I/O access for KMP_DEBUG */ |
519 | #endif |
520 | |
521 | /* ----------------------------------------------- */ |
522 | |
523 | #if KMP_HANDLE_SIGNALS |
524 | /* Signal handling is disabled by default, because it confuses users: In case of |
525 | sigsegv (or other trouble) in user code signal handler catches the signal, |
526 | which then "appears" in the monitor thread (when the monitor executes raise() |
527 | function). Users see signal in the monitor thread and blame OpenMP RTL. |
528 | |
529 | Grant said signal handling required on some older OSes (Irix?) supported by |
530 | KAI, because bad applications hung but not aborted. Currently it is not a |
531 | problem for Linux* OS, OS X* and Windows* OS. |
532 | |
533 | Grant: Found new hangs for EL4, EL5, and a Fedora Core machine. So I'm |
534 | putting the default back for now to see if that fixes hangs on those |
535 | machines. |
536 | |
537 | 2010-04013 Lev: It was a bug in Fortran RTL. Fortran RTL prints a kind of |
538 | stack backtrace when program is aborting, but the code is not signal-safe. |
539 | When multiple signals raised at the same time (which occurs in dynamic |
540 | negative tests because all the worker threads detects the same error), |
541 | Fortran RTL may hang. The bug finally fixed in Fortran RTL library provided |
542 | by Steve R., and will be available soon. */ |
543 | int __kmp_handle_signals = FALSE; |
544 | #endif |
545 | |
546 | #ifdef DEBUG_SUSPEND |
547 | int get_suspend_count_(void) { |
548 | int count = __kmp_suspend_count; |
549 | __kmp_suspend_count = 0; |
550 | return count; |
551 | } |
552 | void set_suspend_count_(int *value) { __kmp_suspend_count = *value; } |
553 | #endif |
554 | |
555 | kmp_target_offload_kind_t __kmp_target_offload = tgt_default; |
556 | |
557 | // OMP Pause Resources |
558 | kmp_pause_status_t __kmp_pause_status = kmp_not_paused; |
559 | |
560 | // Nesting mode |
561 | int __kmp_nesting_mode = 0; |
562 | int __kmp_nesting_mode_nlevels = 1; |
563 | int *__kmp_nesting_nth_level; |
564 | |
565 | #if OMPX_TASKGRAPH |
566 | // TDG record & replay |
567 | int __kmp_tdg_dot = 0; |
568 | kmp_int32 __kmp_max_tdgs = 100; |
569 | kmp_tdg_info_t **__kmp_global_tdgs = NULL; |
570 | kmp_int32 __kmp_curr_tdg_idx = |
571 | 0; // Id of the current TDG being recorded or executed |
572 | kmp_int32 __kmp_num_tdg = 0; |
573 | kmp_int32 __kmp_successors_size = 10; // Initial succesor size list for |
574 | // recording |
575 | std::atomic<kmp_int32> __kmp_tdg_task_id = 0; |
576 | #endif |
577 | // end of file // |
578 | |
579 | |