1 | /* |
2 | * kmp_csupport.cpp -- kfront linkage support for OpenMP. |
3 | */ |
4 | |
5 | //===----------------------------------------------------------------------===// |
6 | // |
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | // See https://llvm.org/LICENSE.txt for license information. |
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #define __KMP_IMP |
14 | #include "omp.h" /* extern "C" declarations of user-visible routines */ |
15 | #include "kmp.h" |
16 | #include "kmp_error.h" |
17 | #include "kmp_i18n.h" |
18 | #include "kmp_itt.h" |
19 | #include "kmp_lock.h" |
20 | #include "kmp_stats.h" |
21 | #include "kmp_utils.h" |
22 | #include "ompt-specific.h" |
23 | |
24 | #define MAX_MESSAGE 512 |
25 | |
26 | // flags will be used in future, e.g. to implement openmp_strict library |
27 | // restrictions |
28 | |
29 | /*! |
30 | * @ingroup STARTUP_SHUTDOWN |
31 | * @param loc in source location information |
32 | * @param flags in for future use (currently ignored) |
33 | * |
34 | * Initialize the runtime library. This call is optional; if it is not made then |
35 | * it will be implicitly called by attempts to use other library functions. |
36 | */ |
37 | void __kmpc_begin(ident_t *loc, kmp_int32 flags) { |
38 | // By default __kmpc_begin() is no-op. |
39 | char *env; |
40 | if ((env = getenv(name: "KMP_INITIAL_THREAD_BIND" )) != NULL && |
41 | __kmp_str_match_true(data: env)) { |
42 | __kmp_middle_initialize(); |
43 | __kmp_assign_root_init_mask(); |
44 | KC_TRACE(10, ("__kmpc_begin: middle initialization called\n" )); |
45 | } else if (__kmp_ignore_mppbeg() == FALSE) { |
46 | // By default __kmp_ignore_mppbeg() returns TRUE. |
47 | __kmp_internal_begin(); |
48 | KC_TRACE(10, ("__kmpc_begin: called\n" )); |
49 | } |
50 | } |
51 | |
52 | /*! |
53 | * @ingroup STARTUP_SHUTDOWN |
54 | * @param loc source location information |
55 | * |
56 | * Shutdown the runtime library. This is also optional, and even if called will |
57 | * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to |
58 | * zero. |
59 | */ |
60 | void __kmpc_end(ident_t *loc) { |
61 | // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() |
62 | // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND |
63 | // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() |
64 | // returns FALSE and __kmpc_end() will unregister this root (it can cause |
65 | // library shut down). |
66 | if (__kmp_ignore_mppend() == FALSE) { |
67 | KC_TRACE(10, ("__kmpc_end: called\n" )); |
68 | KA_TRACE(30, ("__kmpc_end\n" )); |
69 | |
70 | __kmp_internal_end_thread(gtid: -1); |
71 | } |
72 | #if KMP_OS_WINDOWS && OMPT_SUPPORT |
73 | // Normal exit process on Windows does not allow worker threads of the final |
74 | // parallel region to finish reporting their events, so shutting down the |
75 | // library here fixes the issue at least for the cases where __kmpc_end() is |
76 | // placed properly. |
77 | if (ompt_enabled.enabled) |
78 | __kmp_internal_end_library(__kmp_gtid_get_specific()); |
79 | #endif |
80 | } |
81 | |
82 | /*! |
83 | @ingroup THREAD_STATES |
84 | @param loc Source location information. |
85 | @return The global thread index of the active thread. |
86 | |
87 | This function can be called in any context. |
88 | |
89 | If the runtime has ony been entered at the outermost level from a |
90 | single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is |
91 | that which would be returned by omp_get_thread_num() in the outermost |
92 | active parallel construct. (Or zero if there is no active parallel |
93 | construct, since the primary thread is necessarily thread zero). |
94 | |
95 | If multiple non-OpenMP threads all enter an OpenMP construct then this |
96 | will be a unique thread identifier among all the threads created by |
97 | the OpenMP runtime (but the value cannot be defined in terms of |
98 | OpenMP thread ids returned by omp_get_thread_num()). |
99 | */ |
100 | kmp_int32 __kmpc_global_thread_num(ident_t *loc) { |
101 | kmp_int32 gtid = __kmp_entry_gtid(); |
102 | |
103 | KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n" , gtid)); |
104 | |
105 | return gtid; |
106 | } |
107 | |
108 | /*! |
109 | @ingroup THREAD_STATES |
110 | @param loc Source location information. |
111 | @return The number of threads under control of the OpenMP<sup>*</sup> runtime |
112 | |
113 | This function can be called in any context. |
114 | It returns the total number of threads under the control of the OpenMP runtime. |
115 | That is not a number that can be determined by any OpenMP standard calls, since |
116 | the library may be called from more than one non-OpenMP thread, and this |
117 | reflects the total over all such calls. Similarly the runtime maintains |
118 | underlying threads even when they are not active (since the cost of creating |
119 | and destroying OS threads is high), this call counts all such threads even if |
120 | they are not waiting for work. |
121 | */ |
122 | kmp_int32 __kmpc_global_num_threads(ident_t *loc) { |
123 | KC_TRACE(10, |
124 | ("__kmpc_global_num_threads: num_threads = %d\n" , __kmp_all_nth)); |
125 | |
126 | return TCR_4(__kmp_all_nth); |
127 | } |
128 | |
129 | /*! |
130 | @ingroup THREAD_STATES |
131 | @param loc Source location information. |
132 | @return The thread number of the calling thread in the innermost active parallel |
133 | construct. |
134 | */ |
135 | kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { |
136 | KC_TRACE(10, ("__kmpc_bound_thread_num: called\n" )); |
137 | return __kmp_tid_from_gtid(__kmp_entry_gtid()); |
138 | } |
139 | |
140 | /*! |
141 | @ingroup THREAD_STATES |
142 | @param loc Source location information. |
143 | @return The number of threads in the innermost active parallel construct. |
144 | */ |
145 | kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { |
146 | KC_TRACE(10, ("__kmpc_bound_num_threads: called\n" )); |
147 | |
148 | return __kmp_entry_thread()->th.th_team->t.t_nproc; |
149 | } |
150 | |
151 | /*! |
152 | * @ingroup DEPRECATED |
153 | * @param loc location description |
154 | * |
155 | * This function need not be called. It always returns TRUE. |
156 | */ |
157 | kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { |
158 | #ifndef KMP_DEBUG |
159 | |
160 | return TRUE; |
161 | |
162 | #else |
163 | |
164 | const char *semi2; |
165 | const char *semi3; |
166 | int line_no; |
167 | |
168 | if (__kmp_par_range == 0) { |
169 | return TRUE; |
170 | } |
171 | semi2 = loc->psource; |
172 | if (semi2 == NULL) { |
173 | return TRUE; |
174 | } |
175 | semi2 = strchr(s: semi2, c: ';'); |
176 | if (semi2 == NULL) { |
177 | return TRUE; |
178 | } |
179 | semi2 = strchr(s: semi2 + 1, c: ';'); |
180 | if (semi2 == NULL) { |
181 | return TRUE; |
182 | } |
183 | if (__kmp_par_range_filename[0]) { |
184 | const char *name = semi2 - 1; |
185 | while ((name > loc->psource) && (*name != '/') && (*name != ';')) { |
186 | name--; |
187 | } |
188 | if ((*name == '/') || (*name == ';')) { |
189 | name++; |
190 | } |
191 | if (strncmp(s1: __kmp_par_range_filename, s2: name, n: semi2 - name)) { |
192 | return __kmp_par_range < 0; |
193 | } |
194 | } |
195 | semi3 = strchr(s: semi2 + 1, c: ';'); |
196 | if (__kmp_par_range_routine[0]) { |
197 | if ((semi3 != NULL) && (semi3 > semi2) && |
198 | (strncmp(s1: __kmp_par_range_routine, s2: semi2 + 1, n: semi3 - semi2 - 1))) { |
199 | return __kmp_par_range < 0; |
200 | } |
201 | } |
202 | if (KMP_SSCANF(s: semi3 + 1, format: "%d" , &line_no) == 1) { |
203 | if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { |
204 | return __kmp_par_range > 0; |
205 | } |
206 | return __kmp_par_range < 0; |
207 | } |
208 | return TRUE; |
209 | |
210 | #endif /* KMP_DEBUG */ |
211 | } |
212 | |
213 | /*! |
214 | @ingroup THREAD_STATES |
215 | @param loc Source location information. |
216 | @return 1 if this thread is executing inside an active parallel region, zero if |
217 | not. |
218 | */ |
219 | kmp_int32 __kmpc_in_parallel(ident_t *loc) { |
220 | return __kmp_entry_thread()->th.th_root->r.r_active; |
221 | } |
222 | |
223 | /*! |
224 | @ingroup PARALLEL |
225 | @param loc source location information |
226 | @param global_tid global thread number |
227 | @param num_threads number of threads requested for this parallel construct |
228 | |
229 | Set the number of threads to be used by the next fork spawned by this thread. |
230 | This call is only required if the parallel construct has a `num_threads` clause. |
231 | */ |
232 | void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, |
233 | kmp_int32 num_threads) { |
234 | KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n" , |
235 | global_tid, num_threads)); |
236 | __kmp_assert_valid_gtid(gtid: global_tid); |
237 | __kmp_push_num_threads(loc, gtid: global_tid, num_threads); |
238 | } |
239 | |
240 | void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { |
241 | KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n" )); |
242 | /* the num_threads are automatically popped */ |
243 | } |
244 | |
245 | void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, |
246 | kmp_int32 proc_bind) { |
247 | KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n" , global_tid, |
248 | proc_bind)); |
249 | __kmp_assert_valid_gtid(gtid: global_tid); |
250 | __kmp_push_proc_bind(loc, gtid: global_tid, proc_bind: (kmp_proc_bind_t)proc_bind); |
251 | } |
252 | |
253 | /*! |
254 | @ingroup PARALLEL |
255 | @param loc source location information |
256 | @param argc total number of arguments in the ellipsis |
257 | @param microtask pointer to callback routine consisting of outlined parallel |
258 | construct |
259 | @param ... pointers to shared variables that aren't global |
260 | |
261 | Do the actual fork and call the microtask in the relevant number of threads. |
262 | */ |
263 | void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { |
264 | int gtid = __kmp_entry_gtid(); |
265 | |
266 | #if (KMP_STATS_ENABLED) |
267 | // If we were in a serial region, then stop the serial timer, record |
268 | // the event, and start parallel region timer |
269 | stats_state_e previous_state = KMP_GET_THREAD_STATE(); |
270 | if (previous_state == stats_state_e::SERIAL_REGION) { |
271 | KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); |
272 | } else { |
273 | KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); |
274 | } |
275 | int inParallel = __kmpc_in_parallel(loc); |
276 | if (inParallel) { |
277 | KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); |
278 | } else { |
279 | KMP_COUNT_BLOCK(OMP_PARALLEL); |
280 | } |
281 | #endif |
282 | |
283 | // maybe to save thr_state is enough here |
284 | { |
285 | va_list ap; |
286 | va_start(ap, microtask); |
287 | |
288 | #if OMPT_SUPPORT |
289 | ompt_frame_t *ompt_frame; |
290 | if (ompt_enabled.enabled) { |
291 | kmp_info_t *master_th = __kmp_threads[gtid]; |
292 | ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame; |
293 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
294 | } |
295 | OMPT_STORE_RETURN_ADDRESS(gtid); |
296 | #endif |
297 | |
298 | #if INCLUDE_SSC_MARKS |
299 | SSC_MARK_FORKING(); |
300 | #endif |
301 | __kmp_fork_call(loc, gtid, fork_context: fork_context_intel, argc, |
302 | VOLATILE_CAST(microtask_t) microtask, // "wrapped" task |
303 | VOLATILE_CAST(launch_t) __kmp_invoke_task_func, |
304 | kmp_va_addr_of(ap)); |
305 | #if INCLUDE_SSC_MARKS |
306 | SSC_MARK_JOINING(); |
307 | #endif |
308 | __kmp_join_call(loc, gtid |
309 | #if OMPT_SUPPORT |
310 | , |
311 | fork_context: fork_context_intel |
312 | #endif |
313 | ); |
314 | |
315 | va_end(ap); |
316 | |
317 | #if OMPT_SUPPORT |
318 | if (ompt_enabled.enabled) { |
319 | ompt_frame->enter_frame = ompt_data_none; |
320 | } |
321 | #endif |
322 | } |
323 | |
324 | #if KMP_STATS_ENABLED |
325 | if (previous_state == stats_state_e::SERIAL_REGION) { |
326 | KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); |
327 | KMP_SET_THREAD_STATE(previous_state); |
328 | } else { |
329 | KMP_POP_PARTITIONED_TIMER(); |
330 | } |
331 | #endif // KMP_STATS_ENABLED |
332 | } |
333 | |
334 | /*! |
335 | @ingroup PARALLEL |
336 | @param loc source location information |
337 | @param microtask pointer to callback routine consisting of outlined parallel |
338 | construct |
339 | @param cond condition for running in parallel |
340 | @param args struct of pointers to shared variables that aren't global |
341 | |
342 | Perform a fork only if the condition is true. |
343 | */ |
344 | void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, |
345 | kmp_int32 cond, void *args) { |
346 | int gtid = __kmp_entry_gtid(); |
347 | if (cond) { |
348 | if (args) |
349 | __kmpc_fork_call(loc, argc, microtask, args); |
350 | else |
351 | __kmpc_fork_call(loc, argc, microtask); |
352 | } else { |
353 | __kmpc_serialized_parallel(loc, global_tid: gtid); |
354 | |
355 | #if OMPT_SUPPORT |
356 | void *exit_frame_ptr; |
357 | #endif |
358 | |
359 | if (args) |
360 | __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid, |
361 | /*npr=*/0, |
362 | /*argc=*/1, argv: &args |
363 | #if OMPT_SUPPORT |
364 | , |
365 | exit_frame_ptr: &exit_frame_ptr |
366 | #endif |
367 | ); |
368 | else |
369 | __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid, |
370 | /*npr=*/0, |
371 | /*argc=*/0, |
372 | /*args=*/argv: nullptr |
373 | #if OMPT_SUPPORT |
374 | , |
375 | exit_frame_ptr: &exit_frame_ptr |
376 | #endif |
377 | ); |
378 | |
379 | __kmpc_end_serialized_parallel(loc, global_tid: gtid); |
380 | } |
381 | } |
382 | |
383 | /*! |
384 | @ingroup PARALLEL |
385 | @param loc source location information |
386 | @param global_tid global thread number |
387 | @param num_teams number of teams requested for the teams construct |
388 | @param num_threads number of threads per team requested for the teams construct |
389 | |
390 | Set the number of teams to be used by the teams construct. |
391 | This call is only required if the teams construct has a `num_teams` clause |
392 | or a `thread_limit` clause (or both). |
393 | */ |
394 | void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, |
395 | kmp_int32 num_teams, kmp_int32 num_threads) { |
396 | KA_TRACE(20, |
397 | ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n" , |
398 | global_tid, num_teams, num_threads)); |
399 | __kmp_assert_valid_gtid(gtid: global_tid); |
400 | __kmp_push_num_teams(loc, gtid: global_tid, num_teams, num_threads); |
401 | } |
402 | |
403 | /*! |
404 | @ingroup PARALLEL |
405 | @param loc source location information |
406 | @param global_tid global thread number |
407 | @param thread_limit limit on number of threads which can be created within the |
408 | current task |
409 | |
410 | Set the thread_limit for the current task |
411 | This call is there to support `thread_limit` clause on the `target` construct |
412 | */ |
413 | void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, |
414 | kmp_int32 thread_limit) { |
415 | __kmp_assert_valid_gtid(gtid: global_tid); |
416 | kmp_info_t *thread = __kmp_threads[global_tid]; |
417 | if (thread_limit > 0) |
418 | thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit; |
419 | } |
420 | |
421 | /*! |
422 | @ingroup PARALLEL |
423 | @param loc source location information |
424 | @param global_tid global thread number |
425 | @param num_teams_lb lower bound on number of teams requested for the teams |
426 | construct |
427 | @param num_teams_ub upper bound on number of teams requested for the teams |
428 | construct |
429 | @param num_threads number of threads per team requested for the teams construct |
430 | |
431 | Set the number of teams to be used by the teams construct. The number of initial |
432 | teams cretaed will be greater than or equal to the lower bound and less than or |
433 | equal to the upper bound. |
434 | This call is only required if the teams construct has a `num_teams` clause |
435 | or a `thread_limit` clause (or both). |
436 | */ |
437 | void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, |
438 | kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, |
439 | kmp_int32 num_threads) { |
440 | KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" |
441 | " num_teams_ub=%d num_threads=%d\n" , |
442 | global_tid, num_teams_lb, num_teams_ub, num_threads)); |
443 | __kmp_assert_valid_gtid(gtid: global_tid); |
444 | __kmp_push_num_teams_51(loc, gtid: global_tid, num_teams_lb, num_teams_ub, |
445 | num_threads); |
446 | } |
447 | |
448 | /*! |
449 | @ingroup PARALLEL |
450 | @param loc source location information |
451 | @param argc total number of arguments in the ellipsis |
452 | @param microtask pointer to callback routine consisting of outlined teams |
453 | construct |
454 | @param ... pointers to shared variables that aren't global |
455 | |
456 | Do the actual fork and call the microtask in the relevant number of threads. |
457 | */ |
458 | void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, |
459 | ...) { |
460 | int gtid = __kmp_entry_gtid(); |
461 | kmp_info_t *this_thr = __kmp_threads[gtid]; |
462 | va_list ap; |
463 | va_start(ap, microtask); |
464 | |
465 | #if KMP_STATS_ENABLED |
466 | KMP_COUNT_BLOCK(OMP_TEAMS); |
467 | stats_state_e previous_state = KMP_GET_THREAD_STATE(); |
468 | if (previous_state == stats_state_e::SERIAL_REGION) { |
469 | KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); |
470 | } else { |
471 | KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); |
472 | } |
473 | #endif |
474 | |
475 | // remember teams entry point and nesting level |
476 | this_thr->th.th_teams_microtask = microtask; |
477 | this_thr->th.th_teams_level = |
478 | this_thr->th.th_team->t.t_level; // AC: can be >0 on host |
479 | |
480 | #if OMPT_SUPPORT |
481 | kmp_team_t *parent_team = this_thr->th.th_team; |
482 | int tid = __kmp_tid_from_gtid(gtid); |
483 | if (ompt_enabled.enabled) { |
484 | parent_team->t.t_implicit_task_taskdata[tid] |
485 | .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
486 | } |
487 | OMPT_STORE_RETURN_ADDRESS(gtid); |
488 | #endif |
489 | |
490 | // check if __kmpc_push_num_teams called, set default number of teams |
491 | // otherwise |
492 | if (this_thr->th.th_teams_size.nteams == 0) { |
493 | __kmp_push_num_teams(loc, gtid, num_teams: 0, num_threads: 0); |
494 | } |
495 | KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); |
496 | KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); |
497 | KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); |
498 | |
499 | __kmp_fork_call( |
500 | loc, gtid, fork_context: fork_context_intel, argc, |
501 | VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task |
502 | VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); |
503 | __kmp_join_call(loc, gtid |
504 | #if OMPT_SUPPORT |
505 | , |
506 | fork_context: fork_context_intel |
507 | #endif |
508 | ); |
509 | |
510 | // Pop current CG root off list |
511 | KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); |
512 | kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; |
513 | this_thr->th.th_cg_roots = tmp->up; |
514 | KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" |
515 | " to node %p. cg_nthreads was %d\n" , |
516 | this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); |
517 | KMP_DEBUG_ASSERT(tmp->cg_nthreads); |
518 | int i = tmp->cg_nthreads--; |
519 | if (i == 1) { // check is we are the last thread in CG (not always the case) |
520 | __kmp_free(tmp); |
521 | } |
522 | // Restore current task's thread_limit from CG root |
523 | KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); |
524 | this_thr->th.th_current_task->td_icvs.thread_limit = |
525 | this_thr->th.th_cg_roots->cg_thread_limit; |
526 | |
527 | this_thr->th.th_teams_microtask = NULL; |
528 | this_thr->th.th_teams_level = 0; |
529 | *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; |
530 | va_end(ap); |
531 | #if KMP_STATS_ENABLED |
532 | if (previous_state == stats_state_e::SERIAL_REGION) { |
533 | KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); |
534 | KMP_SET_THREAD_STATE(previous_state); |
535 | } else { |
536 | KMP_POP_PARTITIONED_TIMER(); |
537 | } |
538 | #endif // KMP_STATS_ENABLED |
539 | } |
540 | |
541 | // I don't think this function should ever have been exported. |
542 | // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated |
543 | // openmp code ever called it, but it's been exported from the RTL for so |
544 | // long that I'm afraid to remove the definition. |
545 | int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } |
546 | |
547 | /*! |
548 | @ingroup PARALLEL |
549 | @param loc source location information |
550 | @param global_tid global thread number |
551 | |
552 | Enter a serialized parallel construct. This interface is used to handle a |
553 | conditional parallel region, like this, |
554 | @code |
555 | #pragma omp parallel if (condition) |
556 | @endcode |
557 | when the condition is false. |
558 | */ |
559 | void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { |
560 | // The implementation is now in kmp_runtime.cpp so that it can share static |
561 | // functions with kmp_fork_call since the tasks to be done are similar in |
562 | // each case. |
563 | __kmp_assert_valid_gtid(gtid: global_tid); |
564 | #if OMPT_SUPPORT |
565 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
566 | #endif |
567 | __kmp_serialized_parallel(id: loc, gtid: global_tid); |
568 | } |
569 | |
570 | /*! |
571 | @ingroup PARALLEL |
572 | @param loc source location information |
573 | @param global_tid global thread number |
574 | |
575 | Leave a serialized parallel construct. |
576 | */ |
577 | void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { |
578 | kmp_internal_control_t *top; |
579 | kmp_info_t *this_thr; |
580 | kmp_team_t *serial_team; |
581 | |
582 | KC_TRACE(10, |
583 | ("__kmpc_end_serialized_parallel: called by T#%d\n" , global_tid)); |
584 | |
585 | /* skip all this code for autopar serialized loops since it results in |
586 | unacceptable overhead */ |
587 | if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) |
588 | return; |
589 | |
590 | // Not autopar code |
591 | __kmp_assert_valid_gtid(gtid: global_tid); |
592 | if (!TCR_4(__kmp_init_parallel)) |
593 | __kmp_parallel_initialize(); |
594 | |
595 | __kmp_resume_if_soft_paused(); |
596 | |
597 | this_thr = __kmp_threads[global_tid]; |
598 | serial_team = this_thr->th.th_serial_team; |
599 | |
600 | kmp_task_team_t *task_team = this_thr->th.th_task_team; |
601 | // we need to wait for the proxy tasks before finishing the thread |
602 | if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || |
603 | task_team->tt.tt_hidden_helper_task_encountered)) |
604 | __kmp_task_team_wait(this_thr, team: serial_team USE_ITT_BUILD_ARG(NULL)); |
605 | |
606 | KMP_MB(); |
607 | KMP_DEBUG_ASSERT(serial_team); |
608 | KMP_ASSERT(serial_team->t.t_serialized); |
609 | KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); |
610 | KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); |
611 | KMP_DEBUG_ASSERT(serial_team->t.t_threads); |
612 | KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); |
613 | |
614 | #if OMPT_SUPPORT |
615 | if (ompt_enabled.enabled && |
616 | this_thr->th.ompt_thread_info.state != ompt_state_overhead) { |
617 | OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; |
618 | if (ompt_enabled.ompt_callback_implicit_task) { |
619 | ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( |
620 | ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, |
621 | OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); |
622 | } |
623 | |
624 | // reset clear the task id only after unlinking the task |
625 | ompt_data_t *parent_task_data; |
626 | __ompt_get_task_info_internal(ancestor_level: 1, NULL, task_data: &parent_task_data, NULL, NULL, NULL); |
627 | |
628 | if (ompt_enabled.ompt_callback_parallel_end) { |
629 | ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( |
630 | &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, |
631 | ompt_parallel_invoker_program | ompt_parallel_team, |
632 | OMPT_LOAD_RETURN_ADDRESS(global_tid)); |
633 | } |
634 | __ompt_lw_taskteam_unlink(thr: this_thr); |
635 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; |
636 | } |
637 | #endif |
638 | |
639 | /* If necessary, pop the internal control stack values and replace the team |
640 | * values */ |
641 | top = serial_team->t.t_control_stack_top; |
642 | if (top && top->serial_nesting_level == serial_team->t.t_serialized) { |
643 | copy_icvs(dst: &serial_team->t.t_threads[0]->th.th_current_task->td_icvs, src: top); |
644 | serial_team->t.t_control_stack_top = top->next; |
645 | __kmp_free(top); |
646 | } |
647 | |
648 | /* pop dispatch buffers stack */ |
649 | KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); |
650 | { |
651 | dispatch_private_info_t *disp_buffer = |
652 | serial_team->t.t_dispatch->th_disp_buffer; |
653 | serial_team->t.t_dispatch->th_disp_buffer = |
654 | serial_team->t.t_dispatch->th_disp_buffer->next; |
655 | __kmp_free(disp_buffer); |
656 | } |
657 | this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore |
658 | |
659 | --serial_team->t.t_serialized; |
660 | if (serial_team->t.t_serialized == 0) { |
661 | |
662 | /* return to the parallel section */ |
663 | |
664 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
665 | if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { |
666 | __kmp_clear_x87_fpu_status_word(); |
667 | __kmp_load_x87_fpu_control_word(p: &serial_team->t.t_x87_fpu_control_word); |
668 | __kmp_load_mxcsr(p: &serial_team->t.t_mxcsr); |
669 | } |
670 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
671 | |
672 | __kmp_pop_current_task_from_thread(this_thr); |
673 | #if OMPD_SUPPORT |
674 | if (ompd_state & OMPD_ENABLE_BP) |
675 | ompd_bp_parallel_end(); |
676 | #endif |
677 | |
678 | this_thr->th.th_team = serial_team->t.t_parent; |
679 | this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; |
680 | |
681 | /* restore values cached in the thread */ |
682 | this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ |
683 | this_thr->th.th_team_master = |
684 | serial_team->t.t_parent->t.t_threads[0]; /* JPH */ |
685 | this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; |
686 | |
687 | /* TODO the below shouldn't need to be adjusted for serialized teams */ |
688 | this_thr->th.th_dispatch = |
689 | &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; |
690 | |
691 | KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); |
692 | this_thr->th.th_current_task->td_flags.executing = 1; |
693 | |
694 | if (__kmp_tasking_mode != tskm_immediate_exec) { |
695 | // Copy the task team from the new child / old parent team to the thread. |
696 | this_thr->th.th_task_team = |
697 | this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; |
698 | KA_TRACE(20, |
699 | ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " |
700 | "team %p\n" , |
701 | global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); |
702 | } |
703 | #if KMP_AFFINITY_SUPPORTED |
704 | if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) { |
705 | __kmp_reset_root_init_mask(gtid: global_tid); |
706 | } |
707 | #endif |
708 | } else { |
709 | if (__kmp_tasking_mode != tskm_immediate_exec) { |
710 | KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " |
711 | "depth of serial team %p to %d\n" , |
712 | global_tid, serial_team, serial_team->t.t_serialized)); |
713 | } |
714 | } |
715 | |
716 | serial_team->t.t_level--; |
717 | if (__kmp_env_consistency_check) |
718 | __kmp_pop_parallel(gtid: global_tid, NULL); |
719 | #if OMPT_SUPPORT |
720 | if (ompt_enabled.enabled) |
721 | this_thr->th.ompt_thread_info.state = |
722 | ((this_thr->th.th_team_serialized) ? ompt_state_work_serial |
723 | : ompt_state_work_parallel); |
724 | #endif |
725 | } |
726 | |
727 | /*! |
728 | @ingroup SYNCHRONIZATION |
729 | @param loc source location information. |
730 | |
731 | Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though |
732 | depending on the memory ordering convention obeyed by the compiler |
733 | even that may not be necessary). |
734 | */ |
735 | void __kmpc_flush(ident_t *loc) { |
736 | KC_TRACE(10, ("__kmpc_flush: called\n" )); |
737 | |
738 | /* need explicit __mf() here since use volatile instead in library */ |
739 | KMP_MFENCE(); /* Flush all pending memory write invalidates. */ |
740 | |
741 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
742 | if (ompt_enabled.ompt_callback_flush) { |
743 | ompt_callbacks.ompt_callback(ompt_callback_flush)( |
744 | __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); |
745 | } |
746 | #endif |
747 | } |
748 | |
749 | /* -------------------------------------------------------------------------- */ |
750 | /*! |
751 | @ingroup SYNCHRONIZATION |
752 | @param loc source location information |
753 | @param global_tid thread id. |
754 | |
755 | Execute a barrier. |
756 | */ |
757 | void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { |
758 | KMP_COUNT_BLOCK(OMP_BARRIER); |
759 | KC_TRACE(10, ("__kmpc_barrier: called T#%d\n" , global_tid)); |
760 | __kmp_assert_valid_gtid(gtid: global_tid); |
761 | |
762 | if (!TCR_4(__kmp_init_parallel)) |
763 | __kmp_parallel_initialize(); |
764 | |
765 | __kmp_resume_if_soft_paused(); |
766 | |
767 | if (__kmp_env_consistency_check) { |
768 | if (loc == 0) { |
769 | KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? |
770 | } |
771 | __kmp_check_barrier(gtid: global_tid, ct: ct_barrier, ident: loc); |
772 | } |
773 | |
774 | #if OMPT_SUPPORT |
775 | ompt_frame_t *ompt_frame; |
776 | if (ompt_enabled.enabled) { |
777 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
778 | if (ompt_frame->enter_frame.ptr == NULL) |
779 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
780 | } |
781 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
782 | #endif |
783 | __kmp_threads[global_tid]->th.th_ident = loc; |
784 | // TODO: explicit barrier_wait_id: |
785 | // this function is called when 'barrier' directive is present or |
786 | // implicit barrier at the end of a worksharing construct. |
787 | // 1) better to add a per-thread barrier counter to a thread data structure |
788 | // 2) set to 0 when a new team is created |
789 | // 4) no sync is required |
790 | |
791 | __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL); |
792 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
793 | if (ompt_enabled.enabled) { |
794 | ompt_frame->enter_frame = ompt_data_none; |
795 | } |
796 | #endif |
797 | } |
798 | |
799 | /* The BARRIER for a MASTER section is always explicit */ |
800 | /*! |
801 | @ingroup WORK_SHARING |
802 | @param loc source location information. |
803 | @param global_tid global thread number . |
804 | @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. |
805 | */ |
806 | kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { |
807 | int status = 0; |
808 | |
809 | KC_TRACE(10, ("__kmpc_master: called T#%d\n" , global_tid)); |
810 | __kmp_assert_valid_gtid(gtid: global_tid); |
811 | |
812 | if (!TCR_4(__kmp_init_parallel)) |
813 | __kmp_parallel_initialize(); |
814 | |
815 | __kmp_resume_if_soft_paused(); |
816 | |
817 | if (KMP_MASTER_GTID(global_tid)) { |
818 | KMP_COUNT_BLOCK(OMP_MASTER); |
819 | KMP_PUSH_PARTITIONED_TIMER(OMP_master); |
820 | status = 1; |
821 | } |
822 | |
823 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
824 | if (status) { |
825 | if (ompt_enabled.ompt_callback_masked) { |
826 | kmp_info_t *this_thr = __kmp_threads[global_tid]; |
827 | kmp_team_t *team = this_thr->th.th_team; |
828 | |
829 | int tid = __kmp_tid_from_gtid(gtid: global_tid); |
830 | ompt_callbacks.ompt_callback(ompt_callback_masked)( |
831 | ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), |
832 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
833 | OMPT_GET_RETURN_ADDRESS(0)); |
834 | } |
835 | } |
836 | #endif |
837 | |
838 | if (__kmp_env_consistency_check) { |
839 | #if KMP_USE_DYNAMIC_LOCK |
840 | if (status) |
841 | __kmp_push_sync(gtid: global_tid, ct: ct_master, ident: loc, NULL, 0); |
842 | else |
843 | __kmp_check_sync(gtid: global_tid, ct: ct_master, ident: loc, NULL, 0); |
844 | #else |
845 | if (status) |
846 | __kmp_push_sync(global_tid, ct_master, loc, NULL); |
847 | else |
848 | __kmp_check_sync(global_tid, ct_master, loc, NULL); |
849 | #endif |
850 | } |
851 | |
852 | return status; |
853 | } |
854 | |
855 | /*! |
856 | @ingroup WORK_SHARING |
857 | @param loc source location information. |
858 | @param global_tid global thread number . |
859 | |
860 | Mark the end of a <tt>master</tt> region. This should only be called by the |
861 | thread that executes the <tt>master</tt> region. |
862 | */ |
863 | void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { |
864 | KC_TRACE(10, ("__kmpc_end_master: called T#%d\n" , global_tid)); |
865 | __kmp_assert_valid_gtid(gtid: global_tid); |
866 | KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); |
867 | KMP_POP_PARTITIONED_TIMER(); |
868 | |
869 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
870 | kmp_info_t *this_thr = __kmp_threads[global_tid]; |
871 | kmp_team_t *team = this_thr->th.th_team; |
872 | if (ompt_enabled.ompt_callback_masked) { |
873 | int tid = __kmp_tid_from_gtid(gtid: global_tid); |
874 | ompt_callbacks.ompt_callback(ompt_callback_masked)( |
875 | ompt_scope_end, &(team->t.ompt_team_info.parallel_data), |
876 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
877 | OMPT_GET_RETURN_ADDRESS(0)); |
878 | } |
879 | #endif |
880 | |
881 | if (__kmp_env_consistency_check) { |
882 | if (KMP_MASTER_GTID(global_tid)) |
883 | __kmp_pop_sync(gtid: global_tid, ct: ct_master, ident: loc); |
884 | } |
885 | } |
886 | |
887 | /*! |
888 | @ingroup WORK_SHARING |
889 | @param loc source location information. |
890 | @param global_tid global thread number. |
891 | @param filter result of evaluating filter clause on thread global_tid, or zero |
892 | if no filter clause present |
893 | @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. |
894 | */ |
895 | kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { |
896 | int status = 0; |
897 | int tid; |
898 | KC_TRACE(10, ("__kmpc_masked: called T#%d\n" , global_tid)); |
899 | __kmp_assert_valid_gtid(gtid: global_tid); |
900 | |
901 | if (!TCR_4(__kmp_init_parallel)) |
902 | __kmp_parallel_initialize(); |
903 | |
904 | __kmp_resume_if_soft_paused(); |
905 | |
906 | tid = __kmp_tid_from_gtid(gtid: global_tid); |
907 | if (tid == filter) { |
908 | KMP_COUNT_BLOCK(OMP_MASKED); |
909 | KMP_PUSH_PARTITIONED_TIMER(OMP_masked); |
910 | status = 1; |
911 | } |
912 | |
913 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
914 | if (status) { |
915 | if (ompt_enabled.ompt_callback_masked) { |
916 | kmp_info_t *this_thr = __kmp_threads[global_tid]; |
917 | kmp_team_t *team = this_thr->th.th_team; |
918 | ompt_callbacks.ompt_callback(ompt_callback_masked)( |
919 | ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), |
920 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
921 | OMPT_GET_RETURN_ADDRESS(0)); |
922 | } |
923 | } |
924 | #endif |
925 | |
926 | if (__kmp_env_consistency_check) { |
927 | #if KMP_USE_DYNAMIC_LOCK |
928 | if (status) |
929 | __kmp_push_sync(gtid: global_tid, ct: ct_masked, ident: loc, NULL, 0); |
930 | else |
931 | __kmp_check_sync(gtid: global_tid, ct: ct_masked, ident: loc, NULL, 0); |
932 | #else |
933 | if (status) |
934 | __kmp_push_sync(global_tid, ct_masked, loc, NULL); |
935 | else |
936 | __kmp_check_sync(global_tid, ct_masked, loc, NULL); |
937 | #endif |
938 | } |
939 | |
940 | return status; |
941 | } |
942 | |
943 | /*! |
944 | @ingroup WORK_SHARING |
945 | @param loc source location information. |
946 | @param global_tid global thread number . |
947 | |
948 | Mark the end of a <tt>masked</tt> region. This should only be called by the |
949 | thread that executes the <tt>masked</tt> region. |
950 | */ |
951 | void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { |
952 | KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n" , global_tid)); |
953 | __kmp_assert_valid_gtid(gtid: global_tid); |
954 | KMP_POP_PARTITIONED_TIMER(); |
955 | |
956 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
957 | kmp_info_t *this_thr = __kmp_threads[global_tid]; |
958 | kmp_team_t *team = this_thr->th.th_team; |
959 | if (ompt_enabled.ompt_callback_masked) { |
960 | int tid = __kmp_tid_from_gtid(gtid: global_tid); |
961 | ompt_callbacks.ompt_callback(ompt_callback_masked)( |
962 | ompt_scope_end, &(team->t.ompt_team_info.parallel_data), |
963 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
964 | OMPT_GET_RETURN_ADDRESS(0)); |
965 | } |
966 | #endif |
967 | |
968 | if (__kmp_env_consistency_check) { |
969 | __kmp_pop_sync(gtid: global_tid, ct: ct_masked, ident: loc); |
970 | } |
971 | } |
972 | |
973 | /*! |
974 | @ingroup WORK_SHARING |
975 | @param loc source location information. |
976 | @param gtid global thread number. |
977 | |
978 | Start execution of an <tt>ordered</tt> construct. |
979 | */ |
980 | void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { |
981 | int cid = 0; |
982 | kmp_info_t *th; |
983 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
984 | |
985 | KC_TRACE(10, ("__kmpc_ordered: called T#%d\n" , gtid)); |
986 | __kmp_assert_valid_gtid(gtid); |
987 | |
988 | if (!TCR_4(__kmp_init_parallel)) |
989 | __kmp_parallel_initialize(); |
990 | |
991 | __kmp_resume_if_soft_paused(); |
992 | |
993 | #if USE_ITT_BUILD |
994 | __kmp_itt_ordered_prep(gtid); |
995 | // TODO: ordered_wait_id |
996 | #endif /* USE_ITT_BUILD */ |
997 | |
998 | th = __kmp_threads[gtid]; |
999 | |
1000 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1001 | kmp_team_t *team; |
1002 | ompt_wait_id_t lck; |
1003 | void *codeptr_ra; |
1004 | OMPT_STORE_RETURN_ADDRESS(gtid); |
1005 | if (ompt_enabled.enabled) { |
1006 | team = __kmp_team_from_gtid(gtid); |
1007 | lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; |
1008 | /* OMPT state update */ |
1009 | th->th.ompt_thread_info.wait_id = lck; |
1010 | th->th.ompt_thread_info.state = ompt_state_wait_ordered; |
1011 | |
1012 | /* OMPT event callback */ |
1013 | codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); |
1014 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
1015 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
1016 | ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, |
1017 | codeptr_ra); |
1018 | } |
1019 | } |
1020 | #endif |
1021 | |
1022 | if (th->th.th_dispatch->th_deo_fcn != 0) |
1023 | (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); |
1024 | else |
1025 | __kmp_parallel_deo(gtid_ref: >id, cid_ref: &cid, loc_ref: loc); |
1026 | |
1027 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1028 | if (ompt_enabled.enabled) { |
1029 | /* OMPT state update */ |
1030 | th->th.ompt_thread_info.state = ompt_state_work_parallel; |
1031 | th->th.ompt_thread_info.wait_id = 0; |
1032 | |
1033 | /* OMPT event callback */ |
1034 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
1035 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
1036 | ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); |
1037 | } |
1038 | } |
1039 | #endif |
1040 | |
1041 | #if USE_ITT_BUILD |
1042 | __kmp_itt_ordered_start(gtid); |
1043 | #endif /* USE_ITT_BUILD */ |
1044 | } |
1045 | |
1046 | /*! |
1047 | @ingroup WORK_SHARING |
1048 | @param loc source location information. |
1049 | @param gtid global thread number. |
1050 | |
1051 | End execution of an <tt>ordered</tt> construct. |
1052 | */ |
1053 | void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { |
1054 | int cid = 0; |
1055 | kmp_info_t *th; |
1056 | |
1057 | KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n" , gtid)); |
1058 | __kmp_assert_valid_gtid(gtid); |
1059 | |
1060 | #if USE_ITT_BUILD |
1061 | __kmp_itt_ordered_end(gtid); |
1062 | // TODO: ordered_wait_id |
1063 | #endif /* USE_ITT_BUILD */ |
1064 | |
1065 | th = __kmp_threads[gtid]; |
1066 | |
1067 | if (th->th.th_dispatch->th_dxo_fcn != 0) |
1068 | (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); |
1069 | else |
1070 | __kmp_parallel_dxo(gtid_ref: >id, cid_ref: &cid, loc_ref: loc); |
1071 | |
1072 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1073 | OMPT_STORE_RETURN_ADDRESS(gtid); |
1074 | if (ompt_enabled.ompt_callback_mutex_released) { |
1075 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
1076 | ompt_mutex_ordered, |
1077 | (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) |
1078 | ->t.t_ordered.dt.t_value, |
1079 | OMPT_LOAD_RETURN_ADDRESS(gtid)); |
1080 | } |
1081 | #endif |
1082 | } |
1083 | |
1084 | #if KMP_USE_DYNAMIC_LOCK |
1085 | |
1086 | static __forceinline void |
1087 | __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, |
1088 | kmp_int32 gtid, kmp_indirect_locktag_t tag) { |
1089 | // Pointer to the allocated indirect lock is written to crit, while indexing |
1090 | // is ignored. |
1091 | void *idx; |
1092 | kmp_indirect_lock_t **lck; |
1093 | lck = (kmp_indirect_lock_t **)crit; |
1094 | kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); |
1095 | KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); |
1096 | KMP_SET_I_LOCK_LOCATION(ilk, loc); |
1097 | KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); |
1098 | KA_TRACE(20, |
1099 | ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n" , tag)); |
1100 | #if USE_ITT_BUILD |
1101 | __kmp_itt_critical_creating(lock: ilk->lock, loc); |
1102 | #endif |
1103 | int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); |
1104 | if (status == 0) { |
1105 | #if USE_ITT_BUILD |
1106 | __kmp_itt_critical_destroyed(lock: ilk->lock); |
1107 | #endif |
1108 | // We don't really need to destroy the unclaimed lock here since it will be |
1109 | // cleaned up at program exit. |
1110 | // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); |
1111 | } |
1112 | KMP_DEBUG_ASSERT(*lck != NULL); |
1113 | } |
1114 | |
1115 | // Fast-path acquire tas lock |
1116 | #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ |
1117 | { \ |
1118 | kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ |
1119 | kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ |
1120 | kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ |
1121 | if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ |
1122 | !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ |
1123 | kmp_uint32 spins; \ |
1124 | KMP_FSYNC_PREPARE(l); \ |
1125 | KMP_INIT_YIELD(spins); \ |
1126 | kmp_backoff_t backoff = __kmp_spin_backoff_params; \ |
1127 | do { \ |
1128 | if (TCR_4(__kmp_nth) > \ |
1129 | (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ |
1130 | KMP_YIELD(TRUE); \ |
1131 | } else { \ |
1132 | KMP_YIELD_SPIN(spins); \ |
1133 | } \ |
1134 | __kmp_spin_backoff(&backoff); \ |
1135 | } while ( \ |
1136 | KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ |
1137 | !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ |
1138 | } \ |
1139 | KMP_FSYNC_ACQUIRED(l); \ |
1140 | } |
1141 | |
1142 | // Fast-path test tas lock |
1143 | #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ |
1144 | { \ |
1145 | kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ |
1146 | kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ |
1147 | kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ |
1148 | rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ |
1149 | __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ |
1150 | } |
1151 | |
1152 | // Fast-path release tas lock |
1153 | #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ |
1154 | { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } |
1155 | |
1156 | #if KMP_USE_FUTEX |
1157 | |
1158 | #include <sys/syscall.h> |
1159 | #include <unistd.h> |
1160 | #ifndef FUTEX_WAIT |
1161 | #define FUTEX_WAIT 0 |
1162 | #endif |
1163 | #ifndef FUTEX_WAKE |
1164 | #define FUTEX_WAKE 1 |
1165 | #endif |
1166 | |
1167 | // Fast-path acquire futex lock |
1168 | #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ |
1169 | { \ |
1170 | kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ |
1171 | kmp_int32 gtid_code = (gtid + 1) << 1; \ |
1172 | KMP_MB(); \ |
1173 | KMP_FSYNC_PREPARE(ftx); \ |
1174 | kmp_int32 poll_val; \ |
1175 | while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ |
1176 | &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ |
1177 | KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ |
1178 | kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ |
1179 | if (!cond) { \ |
1180 | if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ |
1181 | poll_val | \ |
1182 | KMP_LOCK_BUSY(1, futex))) { \ |
1183 | continue; \ |
1184 | } \ |
1185 | poll_val |= KMP_LOCK_BUSY(1, futex); \ |
1186 | } \ |
1187 | kmp_int32 rc; \ |
1188 | if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ |
1189 | NULL, NULL, 0)) != 0) { \ |
1190 | continue; \ |
1191 | } \ |
1192 | gtid_code |= 1; \ |
1193 | } \ |
1194 | KMP_FSYNC_ACQUIRED(ftx); \ |
1195 | } |
1196 | |
1197 | // Fast-path test futex lock |
1198 | #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ |
1199 | { \ |
1200 | kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ |
1201 | if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ |
1202 | KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ |
1203 | KMP_FSYNC_ACQUIRED(ftx); \ |
1204 | rc = TRUE; \ |
1205 | } else { \ |
1206 | rc = FALSE; \ |
1207 | } \ |
1208 | } |
1209 | |
1210 | // Fast-path release futex lock |
1211 | #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ |
1212 | { \ |
1213 | kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ |
1214 | KMP_MB(); \ |
1215 | KMP_FSYNC_RELEASING(ftx); \ |
1216 | kmp_int32 poll_val = \ |
1217 | KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ |
1218 | if (KMP_LOCK_STRIP(poll_val) & 1) { \ |
1219 | syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ |
1220 | KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ |
1221 | } \ |
1222 | KMP_MB(); \ |
1223 | KMP_YIELD_OVERSUB(); \ |
1224 | } |
1225 | |
1226 | #endif // KMP_USE_FUTEX |
1227 | |
1228 | #else // KMP_USE_DYNAMIC_LOCK |
1229 | |
1230 | static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, |
1231 | ident_t const *loc, |
1232 | kmp_int32 gtid) { |
1233 | kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; |
1234 | |
1235 | // Because of the double-check, the following load doesn't need to be volatile |
1236 | kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); |
1237 | |
1238 | if (lck == NULL) { |
1239 | void *idx; |
1240 | |
1241 | // Allocate & initialize the lock. |
1242 | // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() |
1243 | lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); |
1244 | __kmp_init_user_lock_with_checks(lck); |
1245 | __kmp_set_user_lock_location(lck, loc); |
1246 | #if USE_ITT_BUILD |
1247 | __kmp_itt_critical_creating(lck); |
1248 | // __kmp_itt_critical_creating() should be called *before* the first usage |
1249 | // of underlying lock. It is the only place where we can guarantee it. There |
1250 | // are chances the lock will destroyed with no usage, but it is not a |
1251 | // problem, because this is not real event seen by user but rather setting |
1252 | // name for object (lock). See more details in kmp_itt.h. |
1253 | #endif /* USE_ITT_BUILD */ |
1254 | |
1255 | // Use a cmpxchg instruction to slam the start of the critical section with |
1256 | // the lock pointer. If another thread beat us to it, deallocate the lock, |
1257 | // and use the lock that the other thread allocated. |
1258 | int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); |
1259 | |
1260 | if (status == 0) { |
1261 | // Deallocate the lock and reload the value. |
1262 | #if USE_ITT_BUILD |
1263 | __kmp_itt_critical_destroyed(lck); |
1264 | // Let ITT know the lock is destroyed and the same memory location may be reused |
1265 | // for another purpose. |
1266 | #endif /* USE_ITT_BUILD */ |
1267 | __kmp_destroy_user_lock_with_checks(lck); |
1268 | __kmp_user_lock_free(&idx, gtid, lck); |
1269 | lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); |
1270 | KMP_DEBUG_ASSERT(lck != NULL); |
1271 | } |
1272 | } |
1273 | return lck; |
1274 | } |
1275 | |
1276 | #endif // KMP_USE_DYNAMIC_LOCK |
1277 | |
1278 | /*! |
1279 | @ingroup WORK_SHARING |
1280 | @param loc source location information. |
1281 | @param global_tid global thread number. |
1282 | @param crit identity of the critical section. This could be a pointer to a lock |
1283 | associated with the critical section, or some other suitably unique value. |
1284 | |
1285 | Enter code protected by a `critical` construct. |
1286 | This function blocks until the executing thread can enter the critical section. |
1287 | */ |
1288 | void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, |
1289 | kmp_critical_name *crit) { |
1290 | #if KMP_USE_DYNAMIC_LOCK |
1291 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1292 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
1293 | #endif // OMPT_SUPPORT |
1294 | __kmpc_critical_with_hint(loc, global_tid, crit, hint: omp_lock_hint_none); |
1295 | #else |
1296 | KMP_COUNT_BLOCK(OMP_CRITICAL); |
1297 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1298 | ompt_state_t prev_state = ompt_state_undefined; |
1299 | ompt_thread_info_t ti; |
1300 | #endif |
1301 | kmp_user_lock_p lck; |
1302 | |
1303 | KC_TRACE(10, ("__kmpc_critical: called T#%d\n" , global_tid)); |
1304 | __kmp_assert_valid_gtid(global_tid); |
1305 | |
1306 | // TODO: add THR_OVHD_STATE |
1307 | |
1308 | KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); |
1309 | KMP_CHECK_USER_LOCK_INIT(); |
1310 | |
1311 | if ((__kmp_user_lock_kind == lk_tas) && |
1312 | (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { |
1313 | lck = (kmp_user_lock_p)crit; |
1314 | } |
1315 | #if KMP_USE_FUTEX |
1316 | else if ((__kmp_user_lock_kind == lk_futex) && |
1317 | (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { |
1318 | lck = (kmp_user_lock_p)crit; |
1319 | } |
1320 | #endif |
1321 | else { // ticket, queuing or drdpa |
1322 | lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); |
1323 | } |
1324 | |
1325 | if (__kmp_env_consistency_check) |
1326 | __kmp_push_sync(global_tid, ct_critical, loc, lck); |
1327 | |
1328 | // since the critical directive binds to all threads, not just the current |
1329 | // team we have to check this even if we are in a serialized team. |
1330 | // also, even if we are the uber thread, we still have to conduct the lock, |
1331 | // as we have to contend with sibling threads. |
1332 | |
1333 | #if USE_ITT_BUILD |
1334 | __kmp_itt_critical_acquiring(lck); |
1335 | #endif /* USE_ITT_BUILD */ |
1336 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1337 | OMPT_STORE_RETURN_ADDRESS(gtid); |
1338 | void *codeptr_ra = NULL; |
1339 | if (ompt_enabled.enabled) { |
1340 | ti = __kmp_threads[global_tid]->th.ompt_thread_info; |
1341 | /* OMPT state update */ |
1342 | prev_state = ti.state; |
1343 | ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; |
1344 | ti.state = ompt_state_wait_critical; |
1345 | |
1346 | /* OMPT event callback */ |
1347 | codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); |
1348 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
1349 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
1350 | ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), |
1351 | (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); |
1352 | } |
1353 | } |
1354 | #endif |
1355 | // Value of 'crit' should be good for using as a critical_id of the critical |
1356 | // section directive. |
1357 | __kmp_acquire_user_lock_with_checks(lck, global_tid); |
1358 | |
1359 | #if USE_ITT_BUILD |
1360 | __kmp_itt_critical_acquired(lck); |
1361 | #endif /* USE_ITT_BUILD */ |
1362 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1363 | if (ompt_enabled.enabled) { |
1364 | /* OMPT state update */ |
1365 | ti.state = prev_state; |
1366 | ti.wait_id = 0; |
1367 | |
1368 | /* OMPT event callback */ |
1369 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
1370 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
1371 | ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); |
1372 | } |
1373 | } |
1374 | #endif |
1375 | KMP_POP_PARTITIONED_TIMER(); |
1376 | |
1377 | KMP_PUSH_PARTITIONED_TIMER(OMP_critical); |
1378 | KA_TRACE(15, ("__kmpc_critical: done T#%d\n" , global_tid)); |
1379 | #endif // KMP_USE_DYNAMIC_LOCK |
1380 | } |
1381 | |
1382 | #if KMP_USE_DYNAMIC_LOCK |
1383 | |
1384 | // Converts the given hint to an internal lock implementation |
1385 | static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { |
1386 | #if KMP_USE_TSX |
1387 | #define KMP_TSX_LOCK(seq) lockseq_##seq |
1388 | #else |
1389 | #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq |
1390 | #endif |
1391 | |
1392 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1393 | #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm) |
1394 | #else |
1395 | #define KMP_CPUINFO_RTM 0 |
1396 | #endif |
1397 | |
1398 | // Hints that do not require further logic |
1399 | if (hint & kmp_lock_hint_hle) |
1400 | return KMP_TSX_LOCK(hle); |
1401 | if (hint & kmp_lock_hint_rtm) |
1402 | return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; |
1403 | if (hint & kmp_lock_hint_adaptive) |
1404 | return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; |
1405 | |
1406 | // Rule out conflicting hints first by returning the default lock |
1407 | if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) |
1408 | return __kmp_user_lock_seq; |
1409 | if ((hint & omp_lock_hint_speculative) && |
1410 | (hint & omp_lock_hint_nonspeculative)) |
1411 | return __kmp_user_lock_seq; |
1412 | |
1413 | // Do not even consider speculation when it appears to be contended |
1414 | if (hint & omp_lock_hint_contended) |
1415 | return lockseq_queuing; |
1416 | |
1417 | // Uncontended lock without speculation |
1418 | if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) |
1419 | return lockseq_tas; |
1420 | |
1421 | // Use RTM lock for speculation |
1422 | if (hint & omp_lock_hint_speculative) |
1423 | return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; |
1424 | |
1425 | return __kmp_user_lock_seq; |
1426 | } |
1427 | |
1428 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1429 | #if KMP_USE_DYNAMIC_LOCK |
1430 | static kmp_mutex_impl_t |
1431 | __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { |
1432 | if (user_lock) { |
1433 | switch (KMP_EXTRACT_D_TAG(user_lock)) { |
1434 | case 0: |
1435 | break; |
1436 | #if KMP_USE_FUTEX |
1437 | case locktag_futex: |
1438 | return kmp_mutex_impl_queuing; |
1439 | #endif |
1440 | case locktag_tas: |
1441 | return kmp_mutex_impl_spin; |
1442 | #if KMP_USE_TSX |
1443 | case locktag_hle: |
1444 | case locktag_rtm_spin: |
1445 | return kmp_mutex_impl_speculative; |
1446 | #endif |
1447 | default: |
1448 | return kmp_mutex_impl_none; |
1449 | } |
1450 | ilock = KMP_LOOKUP_I_LOCK(user_lock); |
1451 | } |
1452 | KMP_ASSERT(ilock); |
1453 | switch (ilock->type) { |
1454 | #if KMP_USE_TSX |
1455 | case locktag_adaptive: |
1456 | case locktag_rtm_queuing: |
1457 | return kmp_mutex_impl_speculative; |
1458 | #endif |
1459 | case locktag_nested_tas: |
1460 | return kmp_mutex_impl_spin; |
1461 | #if KMP_USE_FUTEX |
1462 | case locktag_nested_futex: |
1463 | #endif |
1464 | case locktag_ticket: |
1465 | case locktag_queuing: |
1466 | case locktag_drdpa: |
1467 | case locktag_nested_ticket: |
1468 | case locktag_nested_queuing: |
1469 | case locktag_nested_drdpa: |
1470 | return kmp_mutex_impl_queuing; |
1471 | default: |
1472 | return kmp_mutex_impl_none; |
1473 | } |
1474 | } |
1475 | #else |
1476 | // For locks without dynamic binding |
1477 | static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { |
1478 | switch (__kmp_user_lock_kind) { |
1479 | case lk_tas: |
1480 | return kmp_mutex_impl_spin; |
1481 | #if KMP_USE_FUTEX |
1482 | case lk_futex: |
1483 | #endif |
1484 | case lk_ticket: |
1485 | case lk_queuing: |
1486 | case lk_drdpa: |
1487 | return kmp_mutex_impl_queuing; |
1488 | #if KMP_USE_TSX |
1489 | case lk_hle: |
1490 | case lk_rtm_queuing: |
1491 | case lk_rtm_spin: |
1492 | case lk_adaptive: |
1493 | return kmp_mutex_impl_speculative; |
1494 | #endif |
1495 | default: |
1496 | return kmp_mutex_impl_none; |
1497 | } |
1498 | } |
1499 | #endif // KMP_USE_DYNAMIC_LOCK |
1500 | #endif // OMPT_SUPPORT && OMPT_OPTIONAL |
1501 | |
1502 | /*! |
1503 | @ingroup WORK_SHARING |
1504 | @param loc source location information. |
1505 | @param global_tid global thread number. |
1506 | @param crit identity of the critical section. This could be a pointer to a lock |
1507 | associated with the critical section, or some other suitably unique value. |
1508 | @param hint the lock hint. |
1509 | |
1510 | Enter code protected by a `critical` construct with a hint. The hint value is |
1511 | used to suggest a lock implementation. This function blocks until the executing |
1512 | thread can enter the critical section unless the hint suggests use of |
1513 | speculative execution and the hardware supports it. |
1514 | */ |
1515 | void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, |
1516 | kmp_critical_name *crit, uint32_t hint) { |
1517 | KMP_COUNT_BLOCK(OMP_CRITICAL); |
1518 | kmp_user_lock_p lck; |
1519 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1520 | ompt_state_t prev_state = ompt_state_undefined; |
1521 | ompt_thread_info_t ti; |
1522 | // This is the case, if called from __kmpc_critical: |
1523 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); |
1524 | if (!codeptr) |
1525 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
1526 | #endif |
1527 | |
1528 | KC_TRACE(10, ("__kmpc_critical: called T#%d\n" , global_tid)); |
1529 | __kmp_assert_valid_gtid(gtid: global_tid); |
1530 | |
1531 | kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; |
1532 | // Check if it is initialized. |
1533 | KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); |
1534 | kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); |
1535 | if (*lk == 0) { |
1536 | if (KMP_IS_D_LOCK(lockseq)) { |
1537 | KMP_COMPARE_AND_STORE_ACQ32( |
1538 | (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0, |
1539 | KMP_GET_D_TAG(lockseq)); |
1540 | } else { |
1541 | __kmp_init_indirect_csptr(crit, loc, gtid: global_tid, KMP_GET_I_TAG(lockseq)); |
1542 | } |
1543 | } |
1544 | // Branch for accessing the actual lock object and set operation. This |
1545 | // branching is inevitable since this lock initialization does not follow the |
1546 | // normal dispatch path (lock table is not used). |
1547 | if (KMP_EXTRACT_D_TAG(lk) != 0) { |
1548 | lck = (kmp_user_lock_p)lk; |
1549 | if (__kmp_env_consistency_check) { |
1550 | __kmp_push_sync(gtid: global_tid, ct: ct_critical, ident: loc, name: lck, |
1551 | __kmp_map_hint_to_lock(hint)); |
1552 | } |
1553 | #if USE_ITT_BUILD |
1554 | __kmp_itt_critical_acquiring(lock: lck); |
1555 | #endif |
1556 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1557 | if (ompt_enabled.enabled) { |
1558 | ti = __kmp_threads[global_tid]->th.ompt_thread_info; |
1559 | /* OMPT state update */ |
1560 | prev_state = ti.state; |
1561 | ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; |
1562 | ti.state = ompt_state_wait_critical; |
1563 | |
1564 | /* OMPT event callback */ |
1565 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
1566 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
1567 | ompt_mutex_critical, (unsigned int)hint, |
1568 | __ompt_get_mutex_impl_type(user_lock: crit), (ompt_wait_id_t)(uintptr_t)lck, |
1569 | codeptr); |
1570 | } |
1571 | } |
1572 | #endif |
1573 | #if KMP_USE_INLINED_TAS |
1574 | if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { |
1575 | KMP_ACQUIRE_TAS_LOCK(lck, global_tid); |
1576 | } else |
1577 | #elif KMP_USE_INLINED_FUTEX |
1578 | if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { |
1579 | KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); |
1580 | } else |
1581 | #endif |
1582 | { |
1583 | KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); |
1584 | } |
1585 | } else { |
1586 | kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); |
1587 | lck = ilk->lock; |
1588 | if (__kmp_env_consistency_check) { |
1589 | __kmp_push_sync(gtid: global_tid, ct: ct_critical, ident: loc, name: lck, |
1590 | __kmp_map_hint_to_lock(hint)); |
1591 | } |
1592 | #if USE_ITT_BUILD |
1593 | __kmp_itt_critical_acquiring(lock: lck); |
1594 | #endif |
1595 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1596 | if (ompt_enabled.enabled) { |
1597 | ti = __kmp_threads[global_tid]->th.ompt_thread_info; |
1598 | /* OMPT state update */ |
1599 | prev_state = ti.state; |
1600 | ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; |
1601 | ti.state = ompt_state_wait_critical; |
1602 | |
1603 | /* OMPT event callback */ |
1604 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
1605 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
1606 | ompt_mutex_critical, (unsigned int)hint, |
1607 | __ompt_get_mutex_impl_type(user_lock: 0, ilock: ilk), (ompt_wait_id_t)(uintptr_t)lck, |
1608 | codeptr); |
1609 | } |
1610 | } |
1611 | #endif |
1612 | KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); |
1613 | } |
1614 | KMP_POP_PARTITIONED_TIMER(); |
1615 | |
1616 | #if USE_ITT_BUILD |
1617 | __kmp_itt_critical_acquired(lock: lck); |
1618 | #endif /* USE_ITT_BUILD */ |
1619 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1620 | if (ompt_enabled.enabled) { |
1621 | /* OMPT state update */ |
1622 | ti.state = prev_state; |
1623 | ti.wait_id = 0; |
1624 | |
1625 | /* OMPT event callback */ |
1626 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
1627 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
1628 | ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
1629 | } |
1630 | } |
1631 | #endif |
1632 | |
1633 | KMP_PUSH_PARTITIONED_TIMER(OMP_critical); |
1634 | KA_TRACE(15, ("__kmpc_critical: done T#%d\n" , global_tid)); |
1635 | } // __kmpc_critical_with_hint |
1636 | |
1637 | #endif // KMP_USE_DYNAMIC_LOCK |
1638 | |
1639 | /*! |
1640 | @ingroup WORK_SHARING |
1641 | @param loc source location information. |
1642 | @param global_tid global thread number . |
1643 | @param crit identity of the critical section. This could be a pointer to a lock |
1644 | associated with the critical section, or some other suitably unique value. |
1645 | |
1646 | Leave a critical section, releasing any lock that was held during its execution. |
1647 | */ |
1648 | void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, |
1649 | kmp_critical_name *crit) { |
1650 | kmp_user_lock_p lck; |
1651 | |
1652 | KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n" , global_tid)); |
1653 | |
1654 | #if KMP_USE_DYNAMIC_LOCK |
1655 | int locktag = KMP_EXTRACT_D_TAG(crit); |
1656 | if (locktag) { |
1657 | lck = (kmp_user_lock_p)crit; |
1658 | KMP_ASSERT(lck != NULL); |
1659 | if (__kmp_env_consistency_check) { |
1660 | __kmp_pop_sync(gtid: global_tid, ct: ct_critical, ident: loc); |
1661 | } |
1662 | #if USE_ITT_BUILD |
1663 | __kmp_itt_critical_releasing(lock: lck); |
1664 | #endif |
1665 | #if KMP_USE_INLINED_TAS |
1666 | if (locktag == locktag_tas && !__kmp_env_consistency_check) { |
1667 | KMP_RELEASE_TAS_LOCK(lck, global_tid); |
1668 | } else |
1669 | #elif KMP_USE_INLINED_FUTEX |
1670 | if (locktag == locktag_futex && !__kmp_env_consistency_check) { |
1671 | KMP_RELEASE_FUTEX_LOCK(lck, global_tid); |
1672 | } else |
1673 | #endif |
1674 | { |
1675 | KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); |
1676 | } |
1677 | } else { |
1678 | kmp_indirect_lock_t *ilk = |
1679 | (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); |
1680 | KMP_ASSERT(ilk != NULL); |
1681 | lck = ilk->lock; |
1682 | if (__kmp_env_consistency_check) { |
1683 | __kmp_pop_sync(gtid: global_tid, ct: ct_critical, ident: loc); |
1684 | } |
1685 | #if USE_ITT_BUILD |
1686 | __kmp_itt_critical_releasing(lock: lck); |
1687 | #endif |
1688 | KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); |
1689 | } |
1690 | |
1691 | #else // KMP_USE_DYNAMIC_LOCK |
1692 | |
1693 | if ((__kmp_user_lock_kind == lk_tas) && |
1694 | (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { |
1695 | lck = (kmp_user_lock_p)crit; |
1696 | } |
1697 | #if KMP_USE_FUTEX |
1698 | else if ((__kmp_user_lock_kind == lk_futex) && |
1699 | (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { |
1700 | lck = (kmp_user_lock_p)crit; |
1701 | } |
1702 | #endif |
1703 | else { // ticket, queuing or drdpa |
1704 | lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); |
1705 | } |
1706 | |
1707 | KMP_ASSERT(lck != NULL); |
1708 | |
1709 | if (__kmp_env_consistency_check) |
1710 | __kmp_pop_sync(global_tid, ct_critical, loc); |
1711 | |
1712 | #if USE_ITT_BUILD |
1713 | __kmp_itt_critical_releasing(lck); |
1714 | #endif /* USE_ITT_BUILD */ |
1715 | // Value of 'crit' should be good for using as a critical_id of the critical |
1716 | // section directive. |
1717 | __kmp_release_user_lock_with_checks(lck, global_tid); |
1718 | |
1719 | #endif // KMP_USE_DYNAMIC_LOCK |
1720 | |
1721 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1722 | /* OMPT release event triggers after lock is released; place here to trigger |
1723 | * for all #if branches */ |
1724 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
1725 | if (ompt_enabled.ompt_callback_mutex_released) { |
1726 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
1727 | ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, |
1728 | OMPT_LOAD_RETURN_ADDRESS(0)); |
1729 | } |
1730 | #endif |
1731 | |
1732 | KMP_POP_PARTITIONED_TIMER(); |
1733 | KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n" , global_tid)); |
1734 | } |
1735 | |
1736 | /*! |
1737 | @ingroup SYNCHRONIZATION |
1738 | @param loc source location information |
1739 | @param global_tid thread id. |
1740 | @return one if the thread should execute the master block, zero otherwise |
1741 | |
1742 | Start execution of a combined barrier and master. The barrier is executed inside |
1743 | this function. |
1744 | */ |
1745 | kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { |
1746 | int status; |
1747 | KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n" , global_tid)); |
1748 | __kmp_assert_valid_gtid(gtid: global_tid); |
1749 | |
1750 | if (!TCR_4(__kmp_init_parallel)) |
1751 | __kmp_parallel_initialize(); |
1752 | |
1753 | __kmp_resume_if_soft_paused(); |
1754 | |
1755 | if (__kmp_env_consistency_check) |
1756 | __kmp_check_barrier(gtid: global_tid, ct: ct_barrier, ident: loc); |
1757 | |
1758 | #if OMPT_SUPPORT |
1759 | ompt_frame_t *ompt_frame; |
1760 | if (ompt_enabled.enabled) { |
1761 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
1762 | if (ompt_frame->enter_frame.ptr == NULL) |
1763 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
1764 | } |
1765 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
1766 | #endif |
1767 | #if USE_ITT_NOTIFY |
1768 | __kmp_threads[global_tid]->th.th_ident = loc; |
1769 | #endif |
1770 | status = __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, TRUE, reduce_size: 0, NULL, NULL); |
1771 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1772 | if (ompt_enabled.enabled) { |
1773 | ompt_frame->enter_frame = ompt_data_none; |
1774 | } |
1775 | #endif |
1776 | |
1777 | return (status != 0) ? 0 : 1; |
1778 | } |
1779 | |
1780 | /*! |
1781 | @ingroup SYNCHRONIZATION |
1782 | @param loc source location information |
1783 | @param global_tid thread id. |
1784 | |
1785 | Complete the execution of a combined barrier and master. This function should |
1786 | only be called at the completion of the <tt>master</tt> code. Other threads will |
1787 | still be waiting at the barrier and this call releases them. |
1788 | */ |
1789 | void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { |
1790 | KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n" , global_tid)); |
1791 | __kmp_assert_valid_gtid(gtid: global_tid); |
1792 | __kmp_end_split_barrier(bt: bs_plain_barrier, gtid: global_tid); |
1793 | } |
1794 | |
1795 | /*! |
1796 | @ingroup SYNCHRONIZATION |
1797 | @param loc source location information |
1798 | @param global_tid thread id. |
1799 | @return one if the thread should execute the master block, zero otherwise |
1800 | |
1801 | Start execution of a combined barrier and master(nowait) construct. |
1802 | The barrier is executed inside this function. |
1803 | There is no equivalent "end" function, since the |
1804 | */ |
1805 | kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { |
1806 | kmp_int32 ret; |
1807 | KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n" , global_tid)); |
1808 | __kmp_assert_valid_gtid(gtid: global_tid); |
1809 | |
1810 | if (!TCR_4(__kmp_init_parallel)) |
1811 | __kmp_parallel_initialize(); |
1812 | |
1813 | __kmp_resume_if_soft_paused(); |
1814 | |
1815 | if (__kmp_env_consistency_check) { |
1816 | if (loc == 0) { |
1817 | KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? |
1818 | } |
1819 | __kmp_check_barrier(gtid: global_tid, ct: ct_barrier, ident: loc); |
1820 | } |
1821 | |
1822 | #if OMPT_SUPPORT |
1823 | ompt_frame_t *ompt_frame; |
1824 | if (ompt_enabled.enabled) { |
1825 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
1826 | if (ompt_frame->enter_frame.ptr == NULL) |
1827 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
1828 | } |
1829 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
1830 | #endif |
1831 | #if USE_ITT_NOTIFY |
1832 | __kmp_threads[global_tid]->th.th_ident = loc; |
1833 | #endif |
1834 | __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL); |
1835 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1836 | if (ompt_enabled.enabled) { |
1837 | ompt_frame->enter_frame = ompt_data_none; |
1838 | } |
1839 | #endif |
1840 | |
1841 | ret = __kmpc_master(loc, global_tid); |
1842 | |
1843 | if (__kmp_env_consistency_check) { |
1844 | /* there's no __kmpc_end_master called; so the (stats) */ |
1845 | /* actions of __kmpc_end_master are done here */ |
1846 | if (ret) { |
1847 | /* only one thread should do the pop since only */ |
1848 | /* one did the push (see __kmpc_master()) */ |
1849 | __kmp_pop_sync(gtid: global_tid, ct: ct_master, ident: loc); |
1850 | } |
1851 | } |
1852 | |
1853 | return (ret); |
1854 | } |
1855 | |
1856 | /* The BARRIER for a SINGLE process section is always explicit */ |
1857 | /*! |
1858 | @ingroup WORK_SHARING |
1859 | @param loc source location information |
1860 | @param global_tid global thread number |
1861 | @return One if this thread should execute the single construct, zero otherwise. |
1862 | |
1863 | Test whether to execute a <tt>single</tt> construct. |
1864 | There are no implicit barriers in the two "single" calls, rather the compiler |
1865 | should introduce an explicit barrier if it is required. |
1866 | */ |
1867 | |
1868 | kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { |
1869 | __kmp_assert_valid_gtid(gtid: global_tid); |
1870 | kmp_int32 rc = __kmp_enter_single(gtid: global_tid, id_ref: loc, TRUE); |
1871 | |
1872 | if (rc) { |
1873 | // We are going to execute the single statement, so we should count it. |
1874 | KMP_COUNT_BLOCK(OMP_SINGLE); |
1875 | KMP_PUSH_PARTITIONED_TIMER(OMP_single); |
1876 | } |
1877 | |
1878 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1879 | kmp_info_t *this_thr = __kmp_threads[global_tid]; |
1880 | kmp_team_t *team = this_thr->th.th_team; |
1881 | int tid = __kmp_tid_from_gtid(gtid: global_tid); |
1882 | |
1883 | if (ompt_enabled.enabled) { |
1884 | if (rc) { |
1885 | if (ompt_enabled.ompt_callback_work) { |
1886 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
1887 | ompt_work_single_executor, ompt_scope_begin, |
1888 | &(team->t.ompt_team_info.parallel_data), |
1889 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
1890 | 1, OMPT_GET_RETURN_ADDRESS(0)); |
1891 | } |
1892 | } else { |
1893 | if (ompt_enabled.ompt_callback_work) { |
1894 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
1895 | ompt_work_single_other, ompt_scope_begin, |
1896 | &(team->t.ompt_team_info.parallel_data), |
1897 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
1898 | 1, OMPT_GET_RETURN_ADDRESS(0)); |
1899 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
1900 | ompt_work_single_other, ompt_scope_end, |
1901 | &(team->t.ompt_team_info.parallel_data), |
1902 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
1903 | 1, OMPT_GET_RETURN_ADDRESS(0)); |
1904 | } |
1905 | } |
1906 | } |
1907 | #endif |
1908 | |
1909 | return rc; |
1910 | } |
1911 | |
1912 | /*! |
1913 | @ingroup WORK_SHARING |
1914 | @param loc source location information |
1915 | @param global_tid global thread number |
1916 | |
1917 | Mark the end of a <tt>single</tt> construct. This function should |
1918 | only be called by the thread that executed the block of code protected |
1919 | by the `single` construct. |
1920 | */ |
1921 | void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { |
1922 | __kmp_assert_valid_gtid(gtid: global_tid); |
1923 | __kmp_exit_single(gtid: global_tid); |
1924 | KMP_POP_PARTITIONED_TIMER(); |
1925 | |
1926 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1927 | kmp_info_t *this_thr = __kmp_threads[global_tid]; |
1928 | kmp_team_t *team = this_thr->th.th_team; |
1929 | int tid = __kmp_tid_from_gtid(gtid: global_tid); |
1930 | |
1931 | if (ompt_enabled.ompt_callback_work) { |
1932 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
1933 | ompt_work_single_executor, ompt_scope_end, |
1934 | &(team->t.ompt_team_info.parallel_data), |
1935 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, |
1936 | OMPT_GET_RETURN_ADDRESS(0)); |
1937 | } |
1938 | #endif |
1939 | } |
1940 | |
1941 | /*! |
1942 | @ingroup WORK_SHARING |
1943 | @param loc Source location |
1944 | @param global_tid Global thread id |
1945 | |
1946 | Mark the end of a statically scheduled loop. |
1947 | */ |
1948 | void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { |
1949 | KMP_POP_PARTITIONED_TIMER(); |
1950 | KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n" , global_tid)); |
1951 | |
1952 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1953 | if (ompt_enabled.ompt_callback_work) { |
1954 | ompt_work_t ompt_work_type = ompt_work_loop; |
1955 | ompt_team_info_t *team_info = __ompt_get_teaminfo(depth: 0, NULL); |
1956 | ompt_task_info_t *task_info = __ompt_get_task_info_object(depth: 0); |
1957 | // Determine workshare type |
1958 | if (loc != NULL) { |
1959 | if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { |
1960 | ompt_work_type = ompt_work_loop; |
1961 | } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { |
1962 | ompt_work_type = ompt_work_sections; |
1963 | } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { |
1964 | ompt_work_type = ompt_work_distribute; |
1965 | } else { |
1966 | // use default set above. |
1967 | // a warning about this case is provided in __kmpc_for_static_init |
1968 | } |
1969 | KMP_DEBUG_ASSERT(ompt_work_type); |
1970 | } |
1971 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
1972 | ompt_work_type, ompt_scope_end, &(team_info->parallel_data), |
1973 | &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); |
1974 | } |
1975 | #endif |
1976 | if (__kmp_env_consistency_check) |
1977 | __kmp_pop_workshare(gtid: global_tid, ct: ct_pdo, ident: loc); |
1978 | } |
1979 | |
1980 | // User routines which take C-style arguments (call by value) |
1981 | // different from the Fortran equivalent routines |
1982 | |
1983 | void ompc_set_num_threads(int arg) { |
1984 | // !!!!! TODO: check the per-task binding |
1985 | __kmp_set_num_threads(new_nth: arg, __kmp_entry_gtid()); |
1986 | } |
1987 | |
1988 | void ompc_set_dynamic(int flag) { |
1989 | kmp_info_t *thread; |
1990 | |
1991 | /* For the thread-private implementation of the internal controls */ |
1992 | thread = __kmp_entry_thread(); |
1993 | |
1994 | __kmp_save_internal_controls(thread); |
1995 | |
1996 | set__dynamic(thread, flag ? true : false); |
1997 | } |
1998 | |
1999 | void ompc_set_nested(int flag) { |
2000 | kmp_info_t *thread; |
2001 | |
2002 | /* For the thread-private internal controls implementation */ |
2003 | thread = __kmp_entry_thread(); |
2004 | |
2005 | __kmp_save_internal_controls(thread); |
2006 | |
2007 | set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); |
2008 | } |
2009 | |
2010 | void ompc_set_max_active_levels(int max_active_levels) { |
2011 | /* TO DO */ |
2012 | /* we want per-task implementation of this internal control */ |
2013 | |
2014 | /* For the per-thread internal controls implementation */ |
2015 | __kmp_set_max_active_levels(__kmp_entry_gtid(), new_max_active_levels: max_active_levels); |
2016 | } |
2017 | |
2018 | void ompc_set_schedule(omp_sched_t kind, int modifier) { |
2019 | // !!!!! TODO: check the per-task binding |
2020 | __kmp_set_schedule(__kmp_entry_gtid(), new_sched: (kmp_sched_t)kind, chunk: modifier); |
2021 | } |
2022 | |
2023 | int ompc_get_ancestor_thread_num(int level) { |
2024 | return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); |
2025 | } |
2026 | |
2027 | int ompc_get_team_size(int level) { |
2028 | return __kmp_get_team_size(__kmp_entry_gtid(), level); |
2029 | } |
2030 | |
2031 | /* OpenMP 5.0 Affinity Format API */ |
2032 | void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { |
2033 | if (!__kmp_init_serial) { |
2034 | __kmp_serial_initialize(); |
2035 | } |
2036 | __kmp_strncpy_truncate(buffer: __kmp_affinity_format, buf_size: KMP_AFFINITY_FORMAT_SIZE, |
2037 | src: format, KMP_STRLEN(s: format) + 1); |
2038 | } |
2039 | |
2040 | size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { |
2041 | size_t format_size; |
2042 | if (!__kmp_init_serial) { |
2043 | __kmp_serial_initialize(); |
2044 | } |
2045 | format_size = KMP_STRLEN(s: __kmp_affinity_format); |
2046 | if (buffer && size) { |
2047 | __kmp_strncpy_truncate(buffer, buf_size: size, src: __kmp_affinity_format, |
2048 | src_size: format_size + 1); |
2049 | } |
2050 | return format_size; |
2051 | } |
2052 | |
2053 | void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { |
2054 | int gtid; |
2055 | if (!TCR_4(__kmp_init_middle)) { |
2056 | __kmp_middle_initialize(); |
2057 | } |
2058 | __kmp_assign_root_init_mask(); |
2059 | gtid = __kmp_get_gtid(); |
2060 | #if KMP_AFFINITY_SUPPORTED |
2061 | if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && |
2062 | __kmp_affinity.flags.reset) { |
2063 | __kmp_reset_root_init_mask(gtid); |
2064 | } |
2065 | #endif |
2066 | __kmp_aux_display_affinity(gtid, format); |
2067 | } |
2068 | |
2069 | size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, |
2070 | char const *format) { |
2071 | int gtid; |
2072 | size_t num_required; |
2073 | kmp_str_buf_t capture_buf; |
2074 | if (!TCR_4(__kmp_init_middle)) { |
2075 | __kmp_middle_initialize(); |
2076 | } |
2077 | __kmp_assign_root_init_mask(); |
2078 | gtid = __kmp_get_gtid(); |
2079 | #if KMP_AFFINITY_SUPPORTED |
2080 | if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && |
2081 | __kmp_affinity.flags.reset) { |
2082 | __kmp_reset_root_init_mask(gtid); |
2083 | } |
2084 | #endif |
2085 | __kmp_str_buf_init(&capture_buf); |
2086 | num_required = __kmp_aux_capture_affinity(gtid, format, buffer: &capture_buf); |
2087 | if (buffer && buf_size) { |
2088 | __kmp_strncpy_truncate(buffer, buf_size, src: capture_buf.str, |
2089 | src_size: capture_buf.used + 1); |
2090 | } |
2091 | __kmp_str_buf_free(buffer: &capture_buf); |
2092 | return num_required; |
2093 | } |
2094 | |
2095 | void kmpc_set_stacksize(int arg) { |
2096 | // __kmp_aux_set_stacksize initializes the library if needed |
2097 | __kmp_aux_set_stacksize(arg); |
2098 | } |
2099 | |
2100 | void kmpc_set_stacksize_s(size_t arg) { |
2101 | // __kmp_aux_set_stacksize initializes the library if needed |
2102 | __kmp_aux_set_stacksize(arg); |
2103 | } |
2104 | |
2105 | void kmpc_set_blocktime(int arg) { |
2106 | int gtid, tid, bt = arg; |
2107 | kmp_info_t *thread; |
2108 | |
2109 | gtid = __kmp_entry_gtid(); |
2110 | tid = __kmp_tid_from_gtid(gtid); |
2111 | thread = __kmp_thread_from_gtid(gtid); |
2112 | |
2113 | __kmp_aux_convert_blocktime(bt: &bt); |
2114 | __kmp_aux_set_blocktime(arg: bt, thread, tid); |
2115 | } |
2116 | |
2117 | void kmpc_set_library(int arg) { |
2118 | // __kmp_user_set_library initializes the library if needed |
2119 | __kmp_user_set_library(arg: (enum library_type)arg); |
2120 | } |
2121 | |
2122 | void kmpc_set_defaults(char const *str) { |
2123 | // __kmp_aux_set_defaults initializes the library if needed |
2124 | __kmp_aux_set_defaults(str, KMP_STRLEN(s: str)); |
2125 | } |
2126 | |
2127 | void kmpc_set_disp_num_buffers(int arg) { |
2128 | // ignore after initialization because some teams have already |
2129 | // allocated dispatch buffers |
2130 | if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && |
2131 | arg <= KMP_MAX_DISP_NUM_BUFF) { |
2132 | __kmp_dispatch_num_buffers = arg; |
2133 | } |
2134 | } |
2135 | |
2136 | int kmpc_set_affinity_mask_proc(int proc, void **mask) { |
2137 | #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED |
2138 | return -1; |
2139 | #else |
2140 | if (!TCR_4(__kmp_init_middle)) { |
2141 | __kmp_middle_initialize(); |
2142 | } |
2143 | __kmp_assign_root_init_mask(); |
2144 | return __kmp_aux_set_affinity_mask_proc(proc, mask); |
2145 | #endif |
2146 | } |
2147 | |
2148 | int kmpc_unset_affinity_mask_proc(int proc, void **mask) { |
2149 | #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED |
2150 | return -1; |
2151 | #else |
2152 | if (!TCR_4(__kmp_init_middle)) { |
2153 | __kmp_middle_initialize(); |
2154 | } |
2155 | __kmp_assign_root_init_mask(); |
2156 | return __kmp_aux_unset_affinity_mask_proc(proc, mask); |
2157 | #endif |
2158 | } |
2159 | |
2160 | int kmpc_get_affinity_mask_proc(int proc, void **mask) { |
2161 | #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED |
2162 | return -1; |
2163 | #else |
2164 | if (!TCR_4(__kmp_init_middle)) { |
2165 | __kmp_middle_initialize(); |
2166 | } |
2167 | __kmp_assign_root_init_mask(); |
2168 | return __kmp_aux_get_affinity_mask_proc(proc, mask); |
2169 | #endif |
2170 | } |
2171 | |
2172 | /* -------------------------------------------------------------------------- */ |
2173 | /*! |
2174 | @ingroup THREADPRIVATE |
2175 | @param loc source location information |
2176 | @param gtid global thread number |
2177 | @param cpy_size size of the cpy_data buffer |
2178 | @param cpy_data pointer to data to be copied |
2179 | @param cpy_func helper function to call for copying data |
2180 | @param didit flag variable: 1=single thread; 0=not single thread |
2181 | |
2182 | __kmpc_copyprivate implements the interface for the private data broadcast |
2183 | needed for the copyprivate clause associated with a single region in an |
2184 | OpenMP<sup>*</sup> program (both C and Fortran). |
2185 | All threads participating in the parallel region call this routine. |
2186 | One of the threads (called the single thread) should have the <tt>didit</tt> |
2187 | variable set to 1 and all other threads should have that variable set to 0. |
2188 | All threads pass a pointer to a data buffer (cpy_data) that they have built. |
2189 | |
2190 | The OpenMP specification forbids the use of nowait on the single region when a |
2191 | copyprivate clause is present. However, @ref __kmpc_copyprivate implements a |
2192 | barrier internally to avoid race conditions, so the code generation for the |
2193 | single region should avoid generating a barrier after the call to @ref |
2194 | __kmpc_copyprivate. |
2195 | |
2196 | The <tt>gtid</tt> parameter is the global thread id for the current thread. |
2197 | The <tt>loc</tt> parameter is a pointer to source location information. |
2198 | |
2199 | Internal implementation: The single thread will first copy its descriptor |
2200 | address (cpy_data) to a team-private location, then the other threads will each |
2201 | call the function pointed to by the parameter cpy_func, which carries out the |
2202 | copy by copying the data using the cpy_data buffer. |
2203 | |
2204 | The cpy_func routine used for the copy and the contents of the data area defined |
2205 | by cpy_data and cpy_size may be built in any fashion that will allow the copy |
2206 | to be done. For instance, the cpy_data buffer can hold the actual data to be |
2207 | copied or it may hold a list of pointers to the data. The cpy_func routine must |
2208 | interpret the cpy_data buffer appropriately. |
2209 | |
2210 | The interface to cpy_func is as follows: |
2211 | @code |
2212 | void cpy_func( void *destination, void *source ) |
2213 | @endcode |
2214 | where void *destination is the cpy_data pointer for the thread being copied to |
2215 | and void *source is the cpy_data pointer for the thread being copied from. |
2216 | */ |
2217 | void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, |
2218 | void *cpy_data, void (*cpy_func)(void *, void *), |
2219 | kmp_int32 didit) { |
2220 | void **data_ptr; |
2221 | KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n" , gtid)); |
2222 | __kmp_assert_valid_gtid(gtid); |
2223 | |
2224 | KMP_MB(); |
2225 | |
2226 | data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; |
2227 | |
2228 | if (__kmp_env_consistency_check) { |
2229 | if (loc == 0) { |
2230 | KMP_WARNING(ConstructIdentInvalid); |
2231 | } |
2232 | } |
2233 | |
2234 | // ToDo: Optimize the following two barriers into some kind of split barrier |
2235 | |
2236 | if (didit) |
2237 | *data_ptr = cpy_data; |
2238 | |
2239 | #if OMPT_SUPPORT |
2240 | ompt_frame_t *ompt_frame; |
2241 | if (ompt_enabled.enabled) { |
2242 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
2243 | if (ompt_frame->enter_frame.ptr == NULL) |
2244 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
2245 | } |
2246 | OMPT_STORE_RETURN_ADDRESS(gtid); |
2247 | #endif |
2248 | /* This barrier is not a barrier region boundary */ |
2249 | #if USE_ITT_NOTIFY |
2250 | __kmp_threads[gtid]->th.th_ident = loc; |
2251 | #endif |
2252 | __kmp_barrier(bt: bs_plain_barrier, gtid, FALSE, reduce_size: 0, NULL, NULL); |
2253 | |
2254 | if (!didit) |
2255 | (*cpy_func)(cpy_data, *data_ptr); |
2256 | |
2257 | // Consider next barrier a user-visible barrier for barrier region boundaries |
2258 | // Nesting checks are already handled by the single construct checks |
2259 | { |
2260 | #if OMPT_SUPPORT |
2261 | OMPT_STORE_RETURN_ADDRESS(gtid); |
2262 | #endif |
2263 | #if USE_ITT_NOTIFY |
2264 | __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. |
2265 | // tasks can overwrite the location) |
2266 | #endif |
2267 | __kmp_barrier(bt: bs_plain_barrier, gtid, FALSE, reduce_size: 0, NULL, NULL); |
2268 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2269 | if (ompt_enabled.enabled) { |
2270 | ompt_frame->enter_frame = ompt_data_none; |
2271 | } |
2272 | #endif |
2273 | } |
2274 | } |
2275 | |
2276 | /* --------------------------------------------------------------------------*/ |
2277 | /*! |
2278 | @ingroup THREADPRIVATE |
2279 | @param loc source location information |
2280 | @param gtid global thread number |
2281 | @param cpy_data pointer to the data to be saved/copied or 0 |
2282 | @return the saved pointer to the data |
2283 | |
2284 | __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate: |
2285 | __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so |
2286 | coming from single), and returns that pointer in all calls (for single thread |
2287 | it's not needed). This version doesn't do any actual data copying. Data copying |
2288 | has to be done somewhere else, e.g. inline in the generated code. Due to this, |
2289 | this function doesn't have any barrier at the end of the function, like |
2290 | __kmpc_copyprivate does, so generated code needs barrier after copying of all |
2291 | data was done. |
2292 | */ |
2293 | void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) { |
2294 | void **data_ptr; |
2295 | |
2296 | KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n" , gtid)); |
2297 | |
2298 | KMP_MB(); |
2299 | |
2300 | data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; |
2301 | |
2302 | if (__kmp_env_consistency_check) { |
2303 | if (loc == 0) { |
2304 | KMP_WARNING(ConstructIdentInvalid); |
2305 | } |
2306 | } |
2307 | |
2308 | // ToDo: Optimize the following barrier |
2309 | |
2310 | if (cpy_data) |
2311 | *data_ptr = cpy_data; |
2312 | |
2313 | #if OMPT_SUPPORT |
2314 | ompt_frame_t *ompt_frame; |
2315 | if (ompt_enabled.enabled) { |
2316 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
2317 | if (ompt_frame->enter_frame.ptr == NULL) |
2318 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
2319 | OMPT_STORE_RETURN_ADDRESS(gtid); |
2320 | } |
2321 | #endif |
2322 | /* This barrier is not a barrier region boundary */ |
2323 | #if USE_ITT_NOTIFY |
2324 | __kmp_threads[gtid]->th.th_ident = loc; |
2325 | #endif |
2326 | __kmp_barrier(bt: bs_plain_barrier, gtid, FALSE, reduce_size: 0, NULL, NULL); |
2327 | |
2328 | return *data_ptr; |
2329 | } |
2330 | |
2331 | /* -------------------------------------------------------------------------- */ |
2332 | |
2333 | #define INIT_LOCK __kmp_init_user_lock_with_checks |
2334 | #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks |
2335 | #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks |
2336 | #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed |
2337 | #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks |
2338 | #define ACQUIRE_NESTED_LOCK_TIMED \ |
2339 | __kmp_acquire_nested_user_lock_with_checks_timed |
2340 | #define RELEASE_LOCK __kmp_release_user_lock_with_checks |
2341 | #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks |
2342 | #define TEST_LOCK __kmp_test_user_lock_with_checks |
2343 | #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks |
2344 | #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks |
2345 | #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks |
2346 | |
2347 | // TODO: Make check abort messages use location info & pass it into |
2348 | // with_checks routines |
2349 | |
2350 | #if KMP_USE_DYNAMIC_LOCK |
2351 | |
2352 | // internal lock initializer |
2353 | static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, |
2354 | kmp_dyna_lockseq_t seq) { |
2355 | if (KMP_IS_D_LOCK(seq)) { |
2356 | KMP_INIT_D_LOCK(lock, seq); |
2357 | #if USE_ITT_BUILD |
2358 | __kmp_itt_lock_creating(lock: (kmp_user_lock_p)lock, NULL); |
2359 | #endif |
2360 | } else { |
2361 | KMP_INIT_I_LOCK(lock, seq); |
2362 | #if USE_ITT_BUILD |
2363 | kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); |
2364 | __kmp_itt_lock_creating(lock: ilk->lock, loc); |
2365 | #endif |
2366 | } |
2367 | } |
2368 | |
2369 | // internal nest lock initializer |
2370 | static __forceinline void |
2371 | __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, |
2372 | kmp_dyna_lockseq_t seq) { |
2373 | #if KMP_USE_TSX |
2374 | // Don't have nested lock implementation for speculative locks |
2375 | if (seq == lockseq_hle || seq == lockseq_rtm_queuing || |
2376 | seq == lockseq_rtm_spin || seq == lockseq_adaptive) |
2377 | seq = __kmp_user_lock_seq; |
2378 | #endif |
2379 | switch (seq) { |
2380 | case lockseq_tas: |
2381 | seq = lockseq_nested_tas; |
2382 | break; |
2383 | #if KMP_USE_FUTEX |
2384 | case lockseq_futex: |
2385 | seq = lockseq_nested_futex; |
2386 | break; |
2387 | #endif |
2388 | case lockseq_ticket: |
2389 | seq = lockseq_nested_ticket; |
2390 | break; |
2391 | case lockseq_queuing: |
2392 | seq = lockseq_nested_queuing; |
2393 | break; |
2394 | case lockseq_drdpa: |
2395 | seq = lockseq_nested_drdpa; |
2396 | break; |
2397 | default: |
2398 | seq = lockseq_nested_queuing; |
2399 | } |
2400 | KMP_INIT_I_LOCK(lock, seq); |
2401 | #if USE_ITT_BUILD |
2402 | kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); |
2403 | __kmp_itt_lock_creating(lock: ilk->lock, loc); |
2404 | #endif |
2405 | } |
2406 | |
2407 | /* initialize the lock with a hint */ |
2408 | void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, |
2409 | uintptr_t hint) { |
2410 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
2411 | if (__kmp_env_consistency_check && user_lock == NULL) { |
2412 | KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint" ); |
2413 | } |
2414 | |
2415 | __kmp_init_lock_with_hint(loc, lock: user_lock, seq: __kmp_map_hint_to_lock(hint)); |
2416 | |
2417 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2418 | // This is the case, if called from omp_init_lock_with_hint: |
2419 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2420 | if (!codeptr) |
2421 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2422 | if (ompt_enabled.ompt_callback_lock_init) { |
2423 | ompt_callbacks.ompt_callback(ompt_callback_lock_init)( |
2424 | ompt_mutex_lock, (omp_lock_hint_t)hint, |
2425 | __ompt_get_mutex_impl_type(user_lock), |
2426 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2427 | } |
2428 | #endif |
2429 | } |
2430 | |
2431 | /* initialize the lock with a hint */ |
2432 | void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, |
2433 | void **user_lock, uintptr_t hint) { |
2434 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
2435 | if (__kmp_env_consistency_check && user_lock == NULL) { |
2436 | KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint" ); |
2437 | } |
2438 | |
2439 | __kmp_init_nest_lock_with_hint(loc, lock: user_lock, seq: __kmp_map_hint_to_lock(hint)); |
2440 | |
2441 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2442 | // This is the case, if called from omp_init_lock_with_hint: |
2443 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2444 | if (!codeptr) |
2445 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2446 | if (ompt_enabled.ompt_callback_lock_init) { |
2447 | ompt_callbacks.ompt_callback(ompt_callback_lock_init)( |
2448 | ompt_mutex_nest_lock, (omp_lock_hint_t)hint, |
2449 | __ompt_get_mutex_impl_type(user_lock), |
2450 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2451 | } |
2452 | #endif |
2453 | } |
2454 | |
2455 | #endif // KMP_USE_DYNAMIC_LOCK |
2456 | |
2457 | /* initialize the lock */ |
2458 | void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
2459 | #if KMP_USE_DYNAMIC_LOCK |
2460 | |
2461 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
2462 | if (__kmp_env_consistency_check && user_lock == NULL) { |
2463 | KMP_FATAL(LockIsUninitialized, "omp_init_lock" ); |
2464 | } |
2465 | __kmp_init_lock_with_hint(loc, lock: user_lock, seq: __kmp_user_lock_seq); |
2466 | |
2467 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2468 | // This is the case, if called from omp_init_lock_with_hint: |
2469 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2470 | if (!codeptr) |
2471 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2472 | if (ompt_enabled.ompt_callback_lock_init) { |
2473 | ompt_callbacks.ompt_callback(ompt_callback_lock_init)( |
2474 | ompt_mutex_lock, omp_lock_hint_none, |
2475 | __ompt_get_mutex_impl_type(user_lock), |
2476 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2477 | } |
2478 | #endif |
2479 | |
2480 | #else // KMP_USE_DYNAMIC_LOCK |
2481 | |
2482 | static char const *const func = "omp_init_lock" ; |
2483 | kmp_user_lock_p lck; |
2484 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
2485 | |
2486 | if (__kmp_env_consistency_check) { |
2487 | if (user_lock == NULL) { |
2488 | KMP_FATAL(LockIsUninitialized, func); |
2489 | } |
2490 | } |
2491 | |
2492 | KMP_CHECK_USER_LOCK_INIT(); |
2493 | |
2494 | if ((__kmp_user_lock_kind == lk_tas) && |
2495 | (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2496 | lck = (kmp_user_lock_p)user_lock; |
2497 | } |
2498 | #if KMP_USE_FUTEX |
2499 | else if ((__kmp_user_lock_kind == lk_futex) && |
2500 | (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2501 | lck = (kmp_user_lock_p)user_lock; |
2502 | } |
2503 | #endif |
2504 | else { |
2505 | lck = __kmp_user_lock_allocate(user_lock, gtid, 0); |
2506 | } |
2507 | INIT_LOCK(lck); |
2508 | __kmp_set_user_lock_location(lck, loc); |
2509 | |
2510 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2511 | // This is the case, if called from omp_init_lock_with_hint: |
2512 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2513 | if (!codeptr) |
2514 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2515 | if (ompt_enabled.ompt_callback_lock_init) { |
2516 | ompt_callbacks.ompt_callback(ompt_callback_lock_init)( |
2517 | ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), |
2518 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2519 | } |
2520 | #endif |
2521 | |
2522 | #if USE_ITT_BUILD |
2523 | __kmp_itt_lock_creating(lck); |
2524 | #endif /* USE_ITT_BUILD */ |
2525 | |
2526 | #endif // KMP_USE_DYNAMIC_LOCK |
2527 | } // __kmpc_init_lock |
2528 | |
2529 | /* initialize the lock */ |
2530 | void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
2531 | #if KMP_USE_DYNAMIC_LOCK |
2532 | |
2533 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
2534 | if (__kmp_env_consistency_check && user_lock == NULL) { |
2535 | KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock" ); |
2536 | } |
2537 | __kmp_init_nest_lock_with_hint(loc, lock: user_lock, seq: __kmp_user_lock_seq); |
2538 | |
2539 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2540 | // This is the case, if called from omp_init_lock_with_hint: |
2541 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2542 | if (!codeptr) |
2543 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2544 | if (ompt_enabled.ompt_callback_lock_init) { |
2545 | ompt_callbacks.ompt_callback(ompt_callback_lock_init)( |
2546 | ompt_mutex_nest_lock, omp_lock_hint_none, |
2547 | __ompt_get_mutex_impl_type(user_lock), |
2548 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2549 | } |
2550 | #endif |
2551 | |
2552 | #else // KMP_USE_DYNAMIC_LOCK |
2553 | |
2554 | static char const *const func = "omp_init_nest_lock" ; |
2555 | kmp_user_lock_p lck; |
2556 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
2557 | |
2558 | if (__kmp_env_consistency_check) { |
2559 | if (user_lock == NULL) { |
2560 | KMP_FATAL(LockIsUninitialized, func); |
2561 | } |
2562 | } |
2563 | |
2564 | KMP_CHECK_USER_LOCK_INIT(); |
2565 | |
2566 | if ((__kmp_user_lock_kind == lk_tas) && |
2567 | (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= |
2568 | OMP_NEST_LOCK_T_SIZE)) { |
2569 | lck = (kmp_user_lock_p)user_lock; |
2570 | } |
2571 | #if KMP_USE_FUTEX |
2572 | else if ((__kmp_user_lock_kind == lk_futex) && |
2573 | (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= |
2574 | OMP_NEST_LOCK_T_SIZE)) { |
2575 | lck = (kmp_user_lock_p)user_lock; |
2576 | } |
2577 | #endif |
2578 | else { |
2579 | lck = __kmp_user_lock_allocate(user_lock, gtid, 0); |
2580 | } |
2581 | |
2582 | INIT_NESTED_LOCK(lck); |
2583 | __kmp_set_user_lock_location(lck, loc); |
2584 | |
2585 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2586 | // This is the case, if called from omp_init_lock_with_hint: |
2587 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2588 | if (!codeptr) |
2589 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2590 | if (ompt_enabled.ompt_callback_lock_init) { |
2591 | ompt_callbacks.ompt_callback(ompt_callback_lock_init)( |
2592 | ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), |
2593 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2594 | } |
2595 | #endif |
2596 | |
2597 | #if USE_ITT_BUILD |
2598 | __kmp_itt_lock_creating(lck); |
2599 | #endif /* USE_ITT_BUILD */ |
2600 | |
2601 | #endif // KMP_USE_DYNAMIC_LOCK |
2602 | } // __kmpc_init_nest_lock |
2603 | |
2604 | void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
2605 | #if KMP_USE_DYNAMIC_LOCK |
2606 | |
2607 | #if USE_ITT_BUILD |
2608 | kmp_user_lock_p lck; |
2609 | if (KMP_EXTRACT_D_TAG(user_lock) == 0) { |
2610 | lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; |
2611 | } else { |
2612 | lck = (kmp_user_lock_p)user_lock; |
2613 | } |
2614 | __kmp_itt_lock_destroyed(lock: lck); |
2615 | #endif |
2616 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2617 | // This is the case, if called from omp_init_lock_with_hint: |
2618 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2619 | if (!codeptr) |
2620 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2621 | if (ompt_enabled.ompt_callback_lock_destroy) { |
2622 | ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( |
2623 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2624 | } |
2625 | #endif |
2626 | KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); |
2627 | #else |
2628 | kmp_user_lock_p lck; |
2629 | |
2630 | if ((__kmp_user_lock_kind == lk_tas) && |
2631 | (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2632 | lck = (kmp_user_lock_p)user_lock; |
2633 | } |
2634 | #if KMP_USE_FUTEX |
2635 | else if ((__kmp_user_lock_kind == lk_futex) && |
2636 | (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2637 | lck = (kmp_user_lock_p)user_lock; |
2638 | } |
2639 | #endif |
2640 | else { |
2641 | lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock" ); |
2642 | } |
2643 | |
2644 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2645 | // This is the case, if called from omp_init_lock_with_hint: |
2646 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2647 | if (!codeptr) |
2648 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2649 | if (ompt_enabled.ompt_callback_lock_destroy) { |
2650 | ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( |
2651 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2652 | } |
2653 | #endif |
2654 | |
2655 | #if USE_ITT_BUILD |
2656 | __kmp_itt_lock_destroyed(lck); |
2657 | #endif /* USE_ITT_BUILD */ |
2658 | DESTROY_LOCK(lck); |
2659 | |
2660 | if ((__kmp_user_lock_kind == lk_tas) && |
2661 | (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2662 | ; |
2663 | } |
2664 | #if KMP_USE_FUTEX |
2665 | else if ((__kmp_user_lock_kind == lk_futex) && |
2666 | (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2667 | ; |
2668 | } |
2669 | #endif |
2670 | else { |
2671 | __kmp_user_lock_free(user_lock, gtid, lck); |
2672 | } |
2673 | #endif // KMP_USE_DYNAMIC_LOCK |
2674 | } // __kmpc_destroy_lock |
2675 | |
2676 | /* destroy the lock */ |
2677 | void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
2678 | #if KMP_USE_DYNAMIC_LOCK |
2679 | |
2680 | #if USE_ITT_BUILD |
2681 | kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); |
2682 | __kmp_itt_lock_destroyed(lock: ilk->lock); |
2683 | #endif |
2684 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2685 | // This is the case, if called from omp_init_lock_with_hint: |
2686 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2687 | if (!codeptr) |
2688 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2689 | if (ompt_enabled.ompt_callback_lock_destroy) { |
2690 | ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( |
2691 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2692 | } |
2693 | #endif |
2694 | KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); |
2695 | |
2696 | #else // KMP_USE_DYNAMIC_LOCK |
2697 | |
2698 | kmp_user_lock_p lck; |
2699 | |
2700 | if ((__kmp_user_lock_kind == lk_tas) && |
2701 | (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= |
2702 | OMP_NEST_LOCK_T_SIZE)) { |
2703 | lck = (kmp_user_lock_p)user_lock; |
2704 | } |
2705 | #if KMP_USE_FUTEX |
2706 | else if ((__kmp_user_lock_kind == lk_futex) && |
2707 | (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= |
2708 | OMP_NEST_LOCK_T_SIZE)) { |
2709 | lck = (kmp_user_lock_p)user_lock; |
2710 | } |
2711 | #endif |
2712 | else { |
2713 | lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock" ); |
2714 | } |
2715 | |
2716 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2717 | // This is the case, if called from omp_init_lock_with_hint: |
2718 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2719 | if (!codeptr) |
2720 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2721 | if (ompt_enabled.ompt_callback_lock_destroy) { |
2722 | ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( |
2723 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2724 | } |
2725 | #endif |
2726 | |
2727 | #if USE_ITT_BUILD |
2728 | __kmp_itt_lock_destroyed(lck); |
2729 | #endif /* USE_ITT_BUILD */ |
2730 | |
2731 | DESTROY_NESTED_LOCK(lck); |
2732 | |
2733 | if ((__kmp_user_lock_kind == lk_tas) && |
2734 | (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= |
2735 | OMP_NEST_LOCK_T_SIZE)) { |
2736 | ; |
2737 | } |
2738 | #if KMP_USE_FUTEX |
2739 | else if ((__kmp_user_lock_kind == lk_futex) && |
2740 | (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= |
2741 | OMP_NEST_LOCK_T_SIZE)) { |
2742 | ; |
2743 | } |
2744 | #endif |
2745 | else { |
2746 | __kmp_user_lock_free(user_lock, gtid, lck); |
2747 | } |
2748 | #endif // KMP_USE_DYNAMIC_LOCK |
2749 | } // __kmpc_destroy_nest_lock |
2750 | |
2751 | void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
2752 | KMP_COUNT_BLOCK(OMP_set_lock); |
2753 | #if KMP_USE_DYNAMIC_LOCK |
2754 | int tag = KMP_EXTRACT_D_TAG(user_lock); |
2755 | #if USE_ITT_BUILD |
2756 | __kmp_itt_lock_acquiring( |
2757 | lock: (kmp_user_lock_p) |
2758 | user_lock); // itt function will get to the right lock object. |
2759 | #endif |
2760 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2761 | // This is the case, if called from omp_init_lock_with_hint: |
2762 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2763 | if (!codeptr) |
2764 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2765 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
2766 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
2767 | ompt_mutex_lock, omp_lock_hint_none, |
2768 | __ompt_get_mutex_impl_type(user_lock), |
2769 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2770 | } |
2771 | #endif |
2772 | #if KMP_USE_INLINED_TAS |
2773 | if (tag == locktag_tas && !__kmp_env_consistency_check) { |
2774 | KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); |
2775 | } else |
2776 | #elif KMP_USE_INLINED_FUTEX |
2777 | if (tag == locktag_futex && !__kmp_env_consistency_check) { |
2778 | KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); |
2779 | } else |
2780 | #endif |
2781 | { |
2782 | __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); |
2783 | } |
2784 | #if USE_ITT_BUILD |
2785 | __kmp_itt_lock_acquired(lock: (kmp_user_lock_p)user_lock); |
2786 | #endif |
2787 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2788 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
2789 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
2790 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2791 | } |
2792 | #endif |
2793 | |
2794 | #else // KMP_USE_DYNAMIC_LOCK |
2795 | |
2796 | kmp_user_lock_p lck; |
2797 | |
2798 | if ((__kmp_user_lock_kind == lk_tas) && |
2799 | (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2800 | lck = (kmp_user_lock_p)user_lock; |
2801 | } |
2802 | #if KMP_USE_FUTEX |
2803 | else if ((__kmp_user_lock_kind == lk_futex) && |
2804 | (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2805 | lck = (kmp_user_lock_p)user_lock; |
2806 | } |
2807 | #endif |
2808 | else { |
2809 | lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock" ); |
2810 | } |
2811 | |
2812 | #if USE_ITT_BUILD |
2813 | __kmp_itt_lock_acquiring(lck); |
2814 | #endif /* USE_ITT_BUILD */ |
2815 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2816 | // This is the case, if called from omp_init_lock_with_hint: |
2817 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2818 | if (!codeptr) |
2819 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2820 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
2821 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
2822 | ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), |
2823 | (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
2824 | } |
2825 | #endif |
2826 | |
2827 | ACQUIRE_LOCK(lck, gtid); |
2828 | |
2829 | #if USE_ITT_BUILD |
2830 | __kmp_itt_lock_acquired(lck); |
2831 | #endif /* USE_ITT_BUILD */ |
2832 | |
2833 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2834 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
2835 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
2836 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
2837 | } |
2838 | #endif |
2839 | |
2840 | #endif // KMP_USE_DYNAMIC_LOCK |
2841 | } |
2842 | |
2843 | void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
2844 | #if KMP_USE_DYNAMIC_LOCK |
2845 | |
2846 | #if USE_ITT_BUILD |
2847 | __kmp_itt_lock_acquiring(lock: (kmp_user_lock_p)user_lock); |
2848 | #endif |
2849 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2850 | // This is the case, if called from omp_init_lock_with_hint: |
2851 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2852 | if (!codeptr) |
2853 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2854 | if (ompt_enabled.enabled) { |
2855 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
2856 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
2857 | ompt_mutex_nest_lock, omp_lock_hint_none, |
2858 | __ompt_get_mutex_impl_type(user_lock), |
2859 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2860 | } |
2861 | } |
2862 | #endif |
2863 | int acquire_status = |
2864 | KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); |
2865 | (void)acquire_status; |
2866 | #if USE_ITT_BUILD |
2867 | __kmp_itt_lock_acquired(lock: (kmp_user_lock_p)user_lock); |
2868 | #endif |
2869 | |
2870 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2871 | if (ompt_enabled.enabled) { |
2872 | if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { |
2873 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
2874 | // lock_first |
2875 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
2876 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, |
2877 | codeptr); |
2878 | } |
2879 | } else { |
2880 | if (ompt_enabled.ompt_callback_nest_lock) { |
2881 | // lock_next |
2882 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
2883 | ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2884 | } |
2885 | } |
2886 | } |
2887 | #endif |
2888 | |
2889 | #else // KMP_USE_DYNAMIC_LOCK |
2890 | int acquire_status; |
2891 | kmp_user_lock_p lck; |
2892 | |
2893 | if ((__kmp_user_lock_kind == lk_tas) && |
2894 | (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= |
2895 | OMP_NEST_LOCK_T_SIZE)) { |
2896 | lck = (kmp_user_lock_p)user_lock; |
2897 | } |
2898 | #if KMP_USE_FUTEX |
2899 | else if ((__kmp_user_lock_kind == lk_futex) && |
2900 | (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= |
2901 | OMP_NEST_LOCK_T_SIZE)) { |
2902 | lck = (kmp_user_lock_p)user_lock; |
2903 | } |
2904 | #endif |
2905 | else { |
2906 | lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock" ); |
2907 | } |
2908 | |
2909 | #if USE_ITT_BUILD |
2910 | __kmp_itt_lock_acquiring(lck); |
2911 | #endif /* USE_ITT_BUILD */ |
2912 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2913 | // This is the case, if called from omp_init_lock_with_hint: |
2914 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2915 | if (!codeptr) |
2916 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2917 | if (ompt_enabled.enabled) { |
2918 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
2919 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
2920 | ompt_mutex_nest_lock, omp_lock_hint_none, |
2921 | __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, |
2922 | codeptr); |
2923 | } |
2924 | } |
2925 | #endif |
2926 | |
2927 | ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); |
2928 | |
2929 | #if USE_ITT_BUILD |
2930 | __kmp_itt_lock_acquired(lck); |
2931 | #endif /* USE_ITT_BUILD */ |
2932 | |
2933 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2934 | if (ompt_enabled.enabled) { |
2935 | if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { |
2936 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
2937 | // lock_first |
2938 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
2939 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
2940 | } |
2941 | } else { |
2942 | if (ompt_enabled.ompt_callback_nest_lock) { |
2943 | // lock_next |
2944 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
2945 | ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
2946 | } |
2947 | } |
2948 | } |
2949 | #endif |
2950 | |
2951 | #endif // KMP_USE_DYNAMIC_LOCK |
2952 | } |
2953 | |
2954 | void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
2955 | #if KMP_USE_DYNAMIC_LOCK |
2956 | |
2957 | int tag = KMP_EXTRACT_D_TAG(user_lock); |
2958 | #if USE_ITT_BUILD |
2959 | __kmp_itt_lock_releasing(lock: (kmp_user_lock_p)user_lock); |
2960 | #endif |
2961 | #if KMP_USE_INLINED_TAS |
2962 | if (tag == locktag_tas && !__kmp_env_consistency_check) { |
2963 | KMP_RELEASE_TAS_LOCK(user_lock, gtid); |
2964 | } else |
2965 | #elif KMP_USE_INLINED_FUTEX |
2966 | if (tag == locktag_futex && !__kmp_env_consistency_check) { |
2967 | KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); |
2968 | } else |
2969 | #endif |
2970 | { |
2971 | __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); |
2972 | } |
2973 | |
2974 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2975 | // This is the case, if called from omp_init_lock_with_hint: |
2976 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2977 | if (!codeptr) |
2978 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2979 | if (ompt_enabled.ompt_callback_mutex_released) { |
2980 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
2981 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2982 | } |
2983 | #endif |
2984 | |
2985 | #else // KMP_USE_DYNAMIC_LOCK |
2986 | |
2987 | kmp_user_lock_p lck; |
2988 | |
2989 | /* Can't use serial interval since not block structured */ |
2990 | /* release the lock */ |
2991 | |
2992 | if ((__kmp_user_lock_kind == lk_tas) && |
2993 | (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2994 | #if KMP_OS_LINUX && \ |
2995 | (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) |
2996 | // "fast" path implemented to fix customer performance issue |
2997 | #if USE_ITT_BUILD |
2998 | __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); |
2999 | #endif /* USE_ITT_BUILD */ |
3000 | TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); |
3001 | KMP_MB(); |
3002 | |
3003 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3004 | // This is the case, if called from omp_init_lock_with_hint: |
3005 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3006 | if (!codeptr) |
3007 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3008 | if (ompt_enabled.ompt_callback_mutex_released) { |
3009 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
3010 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3011 | } |
3012 | #endif |
3013 | |
3014 | return; |
3015 | #else |
3016 | lck = (kmp_user_lock_p)user_lock; |
3017 | #endif |
3018 | } |
3019 | #if KMP_USE_FUTEX |
3020 | else if ((__kmp_user_lock_kind == lk_futex) && |
3021 | (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { |
3022 | lck = (kmp_user_lock_p)user_lock; |
3023 | } |
3024 | #endif |
3025 | else { |
3026 | lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock" ); |
3027 | } |
3028 | |
3029 | #if USE_ITT_BUILD |
3030 | __kmp_itt_lock_releasing(lck); |
3031 | #endif /* USE_ITT_BUILD */ |
3032 | |
3033 | RELEASE_LOCK(lck, gtid); |
3034 | |
3035 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3036 | // This is the case, if called from omp_init_lock_with_hint: |
3037 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3038 | if (!codeptr) |
3039 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3040 | if (ompt_enabled.ompt_callback_mutex_released) { |
3041 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
3042 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3043 | } |
3044 | #endif |
3045 | |
3046 | #endif // KMP_USE_DYNAMIC_LOCK |
3047 | } |
3048 | |
3049 | /* release the lock */ |
3050 | void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
3051 | #if KMP_USE_DYNAMIC_LOCK |
3052 | |
3053 | #if USE_ITT_BUILD |
3054 | __kmp_itt_lock_releasing(lock: (kmp_user_lock_p)user_lock); |
3055 | #endif |
3056 | int release_status = |
3057 | KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); |
3058 | (void)release_status; |
3059 | |
3060 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3061 | // This is the case, if called from omp_init_lock_with_hint: |
3062 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3063 | if (!codeptr) |
3064 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3065 | if (ompt_enabled.enabled) { |
3066 | if (release_status == KMP_LOCK_RELEASED) { |
3067 | if (ompt_enabled.ompt_callback_mutex_released) { |
3068 | // release_lock_last |
3069 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
3070 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, |
3071 | codeptr); |
3072 | } |
3073 | } else if (ompt_enabled.ompt_callback_nest_lock) { |
3074 | // release_lock_prev |
3075 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
3076 | ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
3077 | } |
3078 | } |
3079 | #endif |
3080 | |
3081 | #else // KMP_USE_DYNAMIC_LOCK |
3082 | |
3083 | kmp_user_lock_p lck; |
3084 | |
3085 | /* Can't use serial interval since not block structured */ |
3086 | |
3087 | if ((__kmp_user_lock_kind == lk_tas) && |
3088 | (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= |
3089 | OMP_NEST_LOCK_T_SIZE)) { |
3090 | #if KMP_OS_LINUX && \ |
3091 | (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) |
3092 | // "fast" path implemented to fix customer performance issue |
3093 | kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; |
3094 | #if USE_ITT_BUILD |
3095 | __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); |
3096 | #endif /* USE_ITT_BUILD */ |
3097 | |
3098 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3099 | int release_status = KMP_LOCK_STILL_HELD; |
3100 | #endif |
3101 | |
3102 | if (--(tl->lk.depth_locked) == 0) { |
3103 | TCW_4(tl->lk.poll, 0); |
3104 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3105 | release_status = KMP_LOCK_RELEASED; |
3106 | #endif |
3107 | } |
3108 | KMP_MB(); |
3109 | |
3110 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3111 | // This is the case, if called from omp_init_lock_with_hint: |
3112 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3113 | if (!codeptr) |
3114 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3115 | if (ompt_enabled.enabled) { |
3116 | if (release_status == KMP_LOCK_RELEASED) { |
3117 | if (ompt_enabled.ompt_callback_mutex_released) { |
3118 | // release_lock_last |
3119 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
3120 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3121 | } |
3122 | } else if (ompt_enabled.ompt_callback_nest_lock) { |
3123 | // release_lock_previous |
3124 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
3125 | ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3126 | } |
3127 | } |
3128 | #endif |
3129 | |
3130 | return; |
3131 | #else |
3132 | lck = (kmp_user_lock_p)user_lock; |
3133 | #endif |
3134 | } |
3135 | #if KMP_USE_FUTEX |
3136 | else if ((__kmp_user_lock_kind == lk_futex) && |
3137 | (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= |
3138 | OMP_NEST_LOCK_T_SIZE)) { |
3139 | lck = (kmp_user_lock_p)user_lock; |
3140 | } |
3141 | #endif |
3142 | else { |
3143 | lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock" ); |
3144 | } |
3145 | |
3146 | #if USE_ITT_BUILD |
3147 | __kmp_itt_lock_releasing(lck); |
3148 | #endif /* USE_ITT_BUILD */ |
3149 | |
3150 | int release_status; |
3151 | release_status = RELEASE_NESTED_LOCK(lck, gtid); |
3152 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3153 | // This is the case, if called from omp_init_lock_with_hint: |
3154 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3155 | if (!codeptr) |
3156 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3157 | if (ompt_enabled.enabled) { |
3158 | if (release_status == KMP_LOCK_RELEASED) { |
3159 | if (ompt_enabled.ompt_callback_mutex_released) { |
3160 | // release_lock_last |
3161 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
3162 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3163 | } |
3164 | } else if (ompt_enabled.ompt_callback_nest_lock) { |
3165 | // release_lock_previous |
3166 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
3167 | ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3168 | } |
3169 | } |
3170 | #endif |
3171 | |
3172 | #endif // KMP_USE_DYNAMIC_LOCK |
3173 | } |
3174 | |
3175 | /* try to acquire the lock */ |
3176 | int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
3177 | KMP_COUNT_BLOCK(OMP_test_lock); |
3178 | |
3179 | #if KMP_USE_DYNAMIC_LOCK |
3180 | int rc; |
3181 | int tag = KMP_EXTRACT_D_TAG(user_lock); |
3182 | #if USE_ITT_BUILD |
3183 | __kmp_itt_lock_acquiring(lock: (kmp_user_lock_p)user_lock); |
3184 | #endif |
3185 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3186 | // This is the case, if called from omp_init_lock_with_hint: |
3187 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3188 | if (!codeptr) |
3189 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3190 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
3191 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
3192 | ompt_mutex_test_lock, omp_lock_hint_none, |
3193 | __ompt_get_mutex_impl_type(user_lock), |
3194 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
3195 | } |
3196 | #endif |
3197 | #if KMP_USE_INLINED_TAS |
3198 | if (tag == locktag_tas && !__kmp_env_consistency_check) { |
3199 | KMP_TEST_TAS_LOCK(user_lock, gtid, rc); |
3200 | } else |
3201 | #elif KMP_USE_INLINED_FUTEX |
3202 | if (tag == locktag_futex && !__kmp_env_consistency_check) { |
3203 | KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); |
3204 | } else |
3205 | #endif |
3206 | { |
3207 | rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); |
3208 | } |
3209 | if (rc) { |
3210 | #if USE_ITT_BUILD |
3211 | __kmp_itt_lock_acquired(lock: (kmp_user_lock_p)user_lock); |
3212 | #endif |
3213 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3214 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
3215 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
3216 | ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
3217 | } |
3218 | #endif |
3219 | return FTN_TRUE; |
3220 | } else { |
3221 | #if USE_ITT_BUILD |
3222 | __kmp_itt_lock_cancelled(lock: (kmp_user_lock_p)user_lock); |
3223 | #endif |
3224 | return FTN_FALSE; |
3225 | } |
3226 | |
3227 | #else // KMP_USE_DYNAMIC_LOCK |
3228 | |
3229 | kmp_user_lock_p lck; |
3230 | int rc; |
3231 | |
3232 | if ((__kmp_user_lock_kind == lk_tas) && |
3233 | (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { |
3234 | lck = (kmp_user_lock_p)user_lock; |
3235 | } |
3236 | #if KMP_USE_FUTEX |
3237 | else if ((__kmp_user_lock_kind == lk_futex) && |
3238 | (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { |
3239 | lck = (kmp_user_lock_p)user_lock; |
3240 | } |
3241 | #endif |
3242 | else { |
3243 | lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock" ); |
3244 | } |
3245 | |
3246 | #if USE_ITT_BUILD |
3247 | __kmp_itt_lock_acquiring(lck); |
3248 | #endif /* USE_ITT_BUILD */ |
3249 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3250 | // This is the case, if called from omp_init_lock_with_hint: |
3251 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3252 | if (!codeptr) |
3253 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3254 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
3255 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
3256 | ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), |
3257 | (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3258 | } |
3259 | #endif |
3260 | |
3261 | rc = TEST_LOCK(lck, gtid); |
3262 | #if USE_ITT_BUILD |
3263 | if (rc) { |
3264 | __kmp_itt_lock_acquired(lck); |
3265 | } else { |
3266 | __kmp_itt_lock_cancelled(lck); |
3267 | } |
3268 | #endif /* USE_ITT_BUILD */ |
3269 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3270 | if (rc && ompt_enabled.ompt_callback_mutex_acquired) { |
3271 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
3272 | ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3273 | } |
3274 | #endif |
3275 | |
3276 | return (rc ? FTN_TRUE : FTN_FALSE); |
3277 | |
3278 | /* Can't use serial interval since not block structured */ |
3279 | |
3280 | #endif // KMP_USE_DYNAMIC_LOCK |
3281 | } |
3282 | |
3283 | /* try to acquire the lock */ |
3284 | int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
3285 | #if KMP_USE_DYNAMIC_LOCK |
3286 | int rc; |
3287 | #if USE_ITT_BUILD |
3288 | __kmp_itt_lock_acquiring(lock: (kmp_user_lock_p)user_lock); |
3289 | #endif |
3290 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3291 | // This is the case, if called from omp_init_lock_with_hint: |
3292 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3293 | if (!codeptr) |
3294 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3295 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
3296 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
3297 | ompt_mutex_test_nest_lock, omp_lock_hint_none, |
3298 | __ompt_get_mutex_impl_type(user_lock), |
3299 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
3300 | } |
3301 | #endif |
3302 | rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); |
3303 | #if USE_ITT_BUILD |
3304 | if (rc) { |
3305 | __kmp_itt_lock_acquired(lock: (kmp_user_lock_p)user_lock); |
3306 | } else { |
3307 | __kmp_itt_lock_cancelled(lock: (kmp_user_lock_p)user_lock); |
3308 | } |
3309 | #endif |
3310 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3311 | if (ompt_enabled.enabled && rc) { |
3312 | if (rc == 1) { |
3313 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
3314 | // lock_first |
3315 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
3316 | ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, |
3317 | codeptr); |
3318 | } |
3319 | } else { |
3320 | if (ompt_enabled.ompt_callback_nest_lock) { |
3321 | // lock_next |
3322 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
3323 | ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
3324 | } |
3325 | } |
3326 | } |
3327 | #endif |
3328 | return rc; |
3329 | |
3330 | #else // KMP_USE_DYNAMIC_LOCK |
3331 | |
3332 | kmp_user_lock_p lck; |
3333 | int rc; |
3334 | |
3335 | if ((__kmp_user_lock_kind == lk_tas) && |
3336 | (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= |
3337 | OMP_NEST_LOCK_T_SIZE)) { |
3338 | lck = (kmp_user_lock_p)user_lock; |
3339 | } |
3340 | #if KMP_USE_FUTEX |
3341 | else if ((__kmp_user_lock_kind == lk_futex) && |
3342 | (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= |
3343 | OMP_NEST_LOCK_T_SIZE)) { |
3344 | lck = (kmp_user_lock_p)user_lock; |
3345 | } |
3346 | #endif |
3347 | else { |
3348 | lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock" ); |
3349 | } |
3350 | |
3351 | #if USE_ITT_BUILD |
3352 | __kmp_itt_lock_acquiring(lck); |
3353 | #endif /* USE_ITT_BUILD */ |
3354 | |
3355 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3356 | // This is the case, if called from omp_init_lock_with_hint: |
3357 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3358 | if (!codeptr) |
3359 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3360 | if (ompt_enabled.enabled) && |
3361 | ompt_enabled.ompt_callback_mutex_acquire) { |
3362 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
3363 | ompt_mutex_test_nest_lock, omp_lock_hint_none, |
3364 | __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, |
3365 | codeptr); |
3366 | } |
3367 | #endif |
3368 | |
3369 | rc = TEST_NESTED_LOCK(lck, gtid); |
3370 | #if USE_ITT_BUILD |
3371 | if (rc) { |
3372 | __kmp_itt_lock_acquired(lck); |
3373 | } else { |
3374 | __kmp_itt_lock_cancelled(lck); |
3375 | } |
3376 | #endif /* USE_ITT_BUILD */ |
3377 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3378 | if (ompt_enabled.enabled && rc) { |
3379 | if (rc == 1) { |
3380 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
3381 | // lock_first |
3382 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
3383 | ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3384 | } |
3385 | } else { |
3386 | if (ompt_enabled.ompt_callback_nest_lock) { |
3387 | // lock_next |
3388 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
3389 | ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3390 | } |
3391 | } |
3392 | } |
3393 | #endif |
3394 | return rc; |
3395 | |
3396 | /* Can't use serial interval since not block structured */ |
3397 | |
3398 | #endif // KMP_USE_DYNAMIC_LOCK |
3399 | } |
3400 | |
3401 | // Interface to fast scalable reduce methods routines |
3402 | |
3403 | // keep the selected method in a thread local structure for cross-function |
3404 | // usage: will be used in __kmpc_end_reduce* functions; |
3405 | // another solution: to re-determine the method one more time in |
3406 | // __kmpc_end_reduce* functions (new prototype required then) |
3407 | // AT: which solution is better? |
3408 | #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ |
3409 | ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) |
3410 | |
3411 | #define __KMP_GET_REDUCTION_METHOD(gtid) \ |
3412 | (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) |
3413 | |
3414 | // description of the packed_reduction_method variable: look at the macros in |
3415 | // kmp.h |
3416 | |
3417 | // used in a critical section reduce block |
3418 | static __forceinline void |
3419 | __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, |
3420 | kmp_critical_name *crit) { |
3421 | |
3422 | // this lock was visible to a customer and to the threading profile tool as a |
3423 | // serial overhead span (although it's used for an internal purpose only) |
3424 | // why was it visible in previous implementation? |
3425 | // should we keep it visible in new reduce block? |
3426 | kmp_user_lock_p lck; |
3427 | |
3428 | #if KMP_USE_DYNAMIC_LOCK |
3429 | |
3430 | kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; |
3431 | // Check if it is initialized. |
3432 | if (*lk == 0) { |
3433 | if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { |
3434 | KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, |
3435 | KMP_GET_D_TAG(__kmp_user_lock_seq)); |
3436 | } else { |
3437 | __kmp_init_indirect_csptr(crit, loc, gtid: global_tid, |
3438 | KMP_GET_I_TAG(__kmp_user_lock_seq)); |
3439 | } |
3440 | } |
3441 | // Branch for accessing the actual lock object and set operation. This |
3442 | // branching is inevitable since this lock initialization does not follow the |
3443 | // normal dispatch path (lock table is not used). |
3444 | if (KMP_EXTRACT_D_TAG(lk) != 0) { |
3445 | lck = (kmp_user_lock_p)lk; |
3446 | KMP_DEBUG_ASSERT(lck != NULL); |
3447 | if (__kmp_env_consistency_check) { |
3448 | __kmp_push_sync(gtid: global_tid, ct: ct_critical, ident: loc, name: lck, __kmp_user_lock_seq); |
3449 | } |
3450 | KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); |
3451 | } else { |
3452 | kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); |
3453 | lck = ilk->lock; |
3454 | KMP_DEBUG_ASSERT(lck != NULL); |
3455 | if (__kmp_env_consistency_check) { |
3456 | __kmp_push_sync(gtid: global_tid, ct: ct_critical, ident: loc, name: lck, __kmp_user_lock_seq); |
3457 | } |
3458 | KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); |
3459 | } |
3460 | |
3461 | #else // KMP_USE_DYNAMIC_LOCK |
3462 | |
3463 | // We know that the fast reduction code is only emitted by Intel compilers |
3464 | // with 32 byte critical sections. If there isn't enough space, then we |
3465 | // have to use a pointer. |
3466 | if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { |
3467 | lck = (kmp_user_lock_p)crit; |
3468 | } else { |
3469 | lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); |
3470 | } |
3471 | KMP_DEBUG_ASSERT(lck != NULL); |
3472 | |
3473 | if (__kmp_env_consistency_check) |
3474 | __kmp_push_sync(global_tid, ct_critical, loc, lck); |
3475 | |
3476 | __kmp_acquire_user_lock_with_checks(lck, global_tid); |
3477 | |
3478 | #endif // KMP_USE_DYNAMIC_LOCK |
3479 | } |
3480 | |
3481 | // used in a critical section reduce block |
3482 | static __forceinline void |
3483 | __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, |
3484 | kmp_critical_name *crit) { |
3485 | |
3486 | kmp_user_lock_p lck; |
3487 | |
3488 | #if KMP_USE_DYNAMIC_LOCK |
3489 | |
3490 | if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { |
3491 | lck = (kmp_user_lock_p)crit; |
3492 | if (__kmp_env_consistency_check) |
3493 | __kmp_pop_sync(gtid: global_tid, ct: ct_critical, ident: loc); |
3494 | KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); |
3495 | } else { |
3496 | kmp_indirect_lock_t *ilk = |
3497 | (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); |
3498 | if (__kmp_env_consistency_check) |
3499 | __kmp_pop_sync(gtid: global_tid, ct: ct_critical, ident: loc); |
3500 | KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); |
3501 | } |
3502 | |
3503 | #else // KMP_USE_DYNAMIC_LOCK |
3504 | |
3505 | // We know that the fast reduction code is only emitted by Intel compilers |
3506 | // with 32 byte critical sections. If there isn't enough space, then we have |
3507 | // to use a pointer. |
3508 | if (__kmp_base_user_lock_size > 32) { |
3509 | lck = *((kmp_user_lock_p *)crit); |
3510 | KMP_ASSERT(lck != NULL); |
3511 | } else { |
3512 | lck = (kmp_user_lock_p)crit; |
3513 | } |
3514 | |
3515 | if (__kmp_env_consistency_check) |
3516 | __kmp_pop_sync(global_tid, ct_critical, loc); |
3517 | |
3518 | __kmp_release_user_lock_with_checks(lck, global_tid); |
3519 | |
3520 | #endif // KMP_USE_DYNAMIC_LOCK |
3521 | } // __kmp_end_critical_section_reduce_block |
3522 | |
3523 | static __forceinline int |
3524 | __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, |
3525 | int *task_state) { |
3526 | kmp_team_t *team; |
3527 | |
3528 | // Check if we are inside the teams construct? |
3529 | if (th->th.th_teams_microtask) { |
3530 | *team_p = team = th->th.th_team; |
3531 | if (team->t.t_level == th->th.th_teams_level) { |
3532 | // This is reduction at teams construct. |
3533 | KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 |
3534 | // Let's swap teams temporarily for the reduction. |
3535 | th->th.th_info.ds.ds_tid = team->t.t_master_tid; |
3536 | th->th.th_team = team->t.t_parent; |
3537 | th->th.th_team_nproc = th->th.th_team->t.t_nproc; |
3538 | th->th.th_task_team = th->th.th_team->t.t_task_team[0]; |
3539 | *task_state = th->th.th_task_state; |
3540 | th->th.th_task_state = 0; |
3541 | |
3542 | return 1; |
3543 | } |
3544 | } |
3545 | return 0; |
3546 | } |
3547 | |
3548 | static __forceinline void |
3549 | __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { |
3550 | // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. |
3551 | th->th.th_info.ds.ds_tid = 0; |
3552 | th->th.th_team = team; |
3553 | th->th.th_team_nproc = team->t.t_nproc; |
3554 | th->th.th_task_team = team->t.t_task_team[task_state]; |
3555 | __kmp_type_convert(src: task_state, dest: &(th->th.th_task_state)); |
3556 | } |
3557 | |
3558 | /* 2.a.i. Reduce Block without a terminating barrier */ |
3559 | /*! |
3560 | @ingroup SYNCHRONIZATION |
3561 | @param loc source location information |
3562 | @param global_tid global thread number |
3563 | @param num_vars number of items (variables) to be reduced |
3564 | @param reduce_size size of data in bytes to be reduced |
3565 | @param reduce_data pointer to data to be reduced |
3566 | @param reduce_func callback function providing reduction operation on two |
3567 | operands and returning result of reduction in lhs_data |
3568 | @param lck pointer to the unique lock data structure |
3569 | @result 1 for the primary thread, 0 for all other team threads, 2 for all team |
3570 | threads if atomic reduction needed |
3571 | |
3572 | The nowait version is used for a reduce clause with the nowait argument. |
3573 | */ |
3574 | kmp_int32 |
3575 | __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, |
3576 | size_t reduce_size, void *reduce_data, |
3577 | void (*reduce_func)(void *lhs_data, void *rhs_data), |
3578 | kmp_critical_name *lck) { |
3579 | |
3580 | KMP_COUNT_BLOCK(REDUCE_nowait); |
3581 | int retval = 0; |
3582 | PACKED_REDUCTION_METHOD_T packed_reduction_method; |
3583 | kmp_info_t *th; |
3584 | kmp_team_t *team; |
3585 | int teams_swapped = 0, task_state; |
3586 | KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n" , global_tid)); |
3587 | __kmp_assert_valid_gtid(gtid: global_tid); |
3588 | |
3589 | // why do we need this initialization here at all? |
3590 | // Reduction clause can not be used as a stand-alone directive. |
3591 | |
3592 | // do not call __kmp_serial_initialize(), it will be called by |
3593 | // __kmp_parallel_initialize() if needed |
3594 | // possible detection of false-positive race by the threadchecker ??? |
3595 | if (!TCR_4(__kmp_init_parallel)) |
3596 | __kmp_parallel_initialize(); |
3597 | |
3598 | __kmp_resume_if_soft_paused(); |
3599 | |
3600 | // check correctness of reduce block nesting |
3601 | #if KMP_USE_DYNAMIC_LOCK |
3602 | if (__kmp_env_consistency_check) |
3603 | __kmp_push_sync(gtid: global_tid, ct: ct_reduce, ident: loc, NULL, 0); |
3604 | #else |
3605 | if (__kmp_env_consistency_check) |
3606 | __kmp_push_sync(global_tid, ct_reduce, loc, NULL); |
3607 | #endif |
3608 | |
3609 | th = __kmp_thread_from_gtid(gtid: global_tid); |
3610 | teams_swapped = __kmp_swap_teams_for_teams_reduction(th, team_p: &team, task_state: &task_state); |
3611 | |
3612 | // packed_reduction_method value will be reused by __kmp_end_reduce* function, |
3613 | // the value should be kept in a variable |
3614 | // the variable should be either a construct-specific or thread-specific |
3615 | // property, not a team specific property |
3616 | // (a thread can reach the next reduce block on the next construct, reduce |
3617 | // method may differ on the next construct) |
3618 | // an ident_t "loc" parameter could be used as a construct-specific property |
3619 | // (what if loc == 0?) |
3620 | // (if both construct-specific and team-specific variables were shared, |
3621 | // then unness extra syncs should be needed) |
3622 | // a thread-specific variable is better regarding two issues above (next |
3623 | // construct and extra syncs) |
3624 | // a thread-specific "th_local.reduction_method" variable is used currently |
3625 | // each thread executes 'determine' and 'set' lines (no need to execute by one |
3626 | // thread, to avoid unness extra syncs) |
3627 | |
3628 | packed_reduction_method = __kmp_determine_reduction_method( |
3629 | loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); |
3630 | __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); |
3631 | |
3632 | OMPT_REDUCTION_DECL(th, global_tid); |
3633 | if (packed_reduction_method == critical_reduce_block) { |
3634 | |
3635 | OMPT_REDUCTION_BEGIN; |
3636 | |
3637 | __kmp_enter_critical_section_reduce_block(loc, global_tid, crit: lck); |
3638 | retval = 1; |
3639 | |
3640 | } else if (packed_reduction_method == empty_reduce_block) { |
3641 | |
3642 | OMPT_REDUCTION_BEGIN; |
3643 | |
3644 | // usage: if team size == 1, no synchronization is required ( Intel |
3645 | // platforms only ) |
3646 | retval = 1; |
3647 | |
3648 | } else if (packed_reduction_method == atomic_reduce_block) { |
3649 | |
3650 | retval = 2; |
3651 | |
3652 | // all threads should do this pop here (because __kmpc_end_reduce_nowait() |
3653 | // won't be called by the code gen) |
3654 | // (it's not quite good, because the checking block has been closed by |
3655 | // this 'pop', |
3656 | // but atomic operation has not been executed yet, will be executed |
3657 | // slightly later, literally on next instruction) |
3658 | if (__kmp_env_consistency_check) |
3659 | __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc); |
3660 | |
3661 | } else if (TEST_REDUCTION_METHOD(packed_reduction_method, |
3662 | tree_reduce_block)) { |
3663 | |
3664 | // AT: performance issue: a real barrier here |
3665 | // AT: (if primary thread is slow, other threads are blocked here waiting for |
3666 | // the primary thread to come and release them) |
3667 | // AT: (it's not what a customer might expect specifying NOWAIT clause) |
3668 | // AT: (specifying NOWAIT won't result in improvement of performance, it'll |
3669 | // be confusing to a customer) |
3670 | // AT: another implementation of *barrier_gather*nowait() (or some other design) |
3671 | // might go faster and be more in line with sense of NOWAIT |
3672 | // AT: TO DO: do epcc test and compare times |
3673 | |
3674 | // this barrier should be invisible to a customer and to the threading profile |
3675 | // tool (it's neither a terminating barrier nor customer's code, it's |
3676 | // used for an internal purpose) |
3677 | #if OMPT_SUPPORT |
3678 | // JP: can this barrier potentially leed to task scheduling? |
3679 | // JP: as long as there is a barrier in the implementation, OMPT should and |
3680 | // will provide the barrier events |
3681 | // so we set-up the necessary frame/return addresses. |
3682 | ompt_frame_t *ompt_frame; |
3683 | if (ompt_enabled.enabled) { |
3684 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
3685 | if (ompt_frame->enter_frame.ptr == NULL) |
3686 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
3687 | } |
3688 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
3689 | #endif |
3690 | #if USE_ITT_NOTIFY |
3691 | __kmp_threads[global_tid]->th.th_ident = loc; |
3692 | #endif |
3693 | retval = |
3694 | __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), |
3695 | gtid: global_tid, FALSE, reduce_size, reduce_data, reduce: reduce_func); |
3696 | retval = (retval != 0) ? (0) : (1); |
3697 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3698 | if (ompt_enabled.enabled) { |
3699 | ompt_frame->enter_frame = ompt_data_none; |
3700 | } |
3701 | #endif |
3702 | |
3703 | // all other workers except primary thread should do this pop here |
3704 | // ( none of other workers will get to __kmpc_end_reduce_nowait() ) |
3705 | if (__kmp_env_consistency_check) { |
3706 | if (retval == 0) { |
3707 | __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc); |
3708 | } |
3709 | } |
3710 | |
3711 | } else { |
3712 | |
3713 | // should never reach this block |
3714 | KMP_ASSERT(0); // "unexpected method" |
3715 | } |
3716 | if (teams_swapped) { |
3717 | __kmp_restore_swapped_teams(th, team, task_state); |
3718 | } |
3719 | KA_TRACE( |
3720 | 10, |
3721 | ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n" , |
3722 | global_tid, packed_reduction_method, retval)); |
3723 | |
3724 | return retval; |
3725 | } |
3726 | |
3727 | /*! |
3728 | @ingroup SYNCHRONIZATION |
3729 | @param loc source location information |
3730 | @param global_tid global thread id. |
3731 | @param lck pointer to the unique lock data structure |
3732 | |
3733 | Finish the execution of a reduce nowait. |
3734 | */ |
3735 | void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, |
3736 | kmp_critical_name *lck) { |
3737 | |
3738 | PACKED_REDUCTION_METHOD_T packed_reduction_method; |
3739 | |
3740 | KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n" , global_tid)); |
3741 | __kmp_assert_valid_gtid(gtid: global_tid); |
3742 | |
3743 | packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); |
3744 | |
3745 | OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); |
3746 | |
3747 | if (packed_reduction_method == critical_reduce_block) { |
3748 | |
3749 | __kmp_end_critical_section_reduce_block(loc, global_tid, crit: lck); |
3750 | OMPT_REDUCTION_END; |
3751 | |
3752 | } else if (packed_reduction_method == empty_reduce_block) { |
3753 | |
3754 | // usage: if team size == 1, no synchronization is required ( on Intel |
3755 | // platforms only ) |
3756 | |
3757 | OMPT_REDUCTION_END; |
3758 | |
3759 | } else if (packed_reduction_method == atomic_reduce_block) { |
3760 | |
3761 | // neither primary thread nor other workers should get here |
3762 | // (code gen does not generate this call in case 2: atomic reduce block) |
3763 | // actually it's better to remove this elseif at all; |
3764 | // after removal this value will checked by the 'else' and will assert |
3765 | |
3766 | } else if (TEST_REDUCTION_METHOD(packed_reduction_method, |
3767 | tree_reduce_block)) { |
3768 | |
3769 | // only primary thread gets here |
3770 | // OMPT: tree reduction is annotated in the barrier code |
3771 | |
3772 | } else { |
3773 | |
3774 | // should never reach this block |
3775 | KMP_ASSERT(0); // "unexpected method" |
3776 | } |
3777 | |
3778 | if (__kmp_env_consistency_check) |
3779 | __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc); |
3780 | |
3781 | KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n" , |
3782 | global_tid, packed_reduction_method)); |
3783 | |
3784 | return; |
3785 | } |
3786 | |
3787 | /* 2.a.ii. Reduce Block with a terminating barrier */ |
3788 | |
3789 | /*! |
3790 | @ingroup SYNCHRONIZATION |
3791 | @param loc source location information |
3792 | @param global_tid global thread number |
3793 | @param num_vars number of items (variables) to be reduced |
3794 | @param reduce_size size of data in bytes to be reduced |
3795 | @param reduce_data pointer to data to be reduced |
3796 | @param reduce_func callback function providing reduction operation on two |
3797 | operands and returning result of reduction in lhs_data |
3798 | @param lck pointer to the unique lock data structure |
3799 | @result 1 for the primary thread, 0 for all other team threads, 2 for all team |
3800 | threads if atomic reduction needed |
3801 | |
3802 | A blocking reduce that includes an implicit barrier. |
3803 | */ |
3804 | kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, |
3805 | size_t reduce_size, void *reduce_data, |
3806 | void (*reduce_func)(void *lhs_data, void *rhs_data), |
3807 | kmp_critical_name *lck) { |
3808 | KMP_COUNT_BLOCK(REDUCE_wait); |
3809 | int retval = 0; |
3810 | PACKED_REDUCTION_METHOD_T packed_reduction_method; |
3811 | kmp_info_t *th; |
3812 | kmp_team_t *team; |
3813 | int teams_swapped = 0, task_state; |
3814 | |
3815 | KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n" , global_tid)); |
3816 | __kmp_assert_valid_gtid(gtid: global_tid); |
3817 | |
3818 | // why do we need this initialization here at all? |
3819 | // Reduction clause can not be a stand-alone directive. |
3820 | |
3821 | // do not call __kmp_serial_initialize(), it will be called by |
3822 | // __kmp_parallel_initialize() if needed |
3823 | // possible detection of false-positive race by the threadchecker ??? |
3824 | if (!TCR_4(__kmp_init_parallel)) |
3825 | __kmp_parallel_initialize(); |
3826 | |
3827 | __kmp_resume_if_soft_paused(); |
3828 | |
3829 | // check correctness of reduce block nesting |
3830 | #if KMP_USE_DYNAMIC_LOCK |
3831 | if (__kmp_env_consistency_check) |
3832 | __kmp_push_sync(gtid: global_tid, ct: ct_reduce, ident: loc, NULL, 0); |
3833 | #else |
3834 | if (__kmp_env_consistency_check) |
3835 | __kmp_push_sync(global_tid, ct_reduce, loc, NULL); |
3836 | #endif |
3837 | |
3838 | th = __kmp_thread_from_gtid(gtid: global_tid); |
3839 | teams_swapped = __kmp_swap_teams_for_teams_reduction(th, team_p: &team, task_state: &task_state); |
3840 | |
3841 | packed_reduction_method = __kmp_determine_reduction_method( |
3842 | loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); |
3843 | __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); |
3844 | |
3845 | OMPT_REDUCTION_DECL(th, global_tid); |
3846 | |
3847 | if (packed_reduction_method == critical_reduce_block) { |
3848 | |
3849 | OMPT_REDUCTION_BEGIN; |
3850 | __kmp_enter_critical_section_reduce_block(loc, global_tid, crit: lck); |
3851 | retval = 1; |
3852 | |
3853 | } else if (packed_reduction_method == empty_reduce_block) { |
3854 | |
3855 | OMPT_REDUCTION_BEGIN; |
3856 | // usage: if team size == 1, no synchronization is required ( Intel |
3857 | // platforms only ) |
3858 | retval = 1; |
3859 | |
3860 | } else if (packed_reduction_method == atomic_reduce_block) { |
3861 | |
3862 | retval = 2; |
3863 | |
3864 | } else if (TEST_REDUCTION_METHOD(packed_reduction_method, |
3865 | tree_reduce_block)) { |
3866 | |
3867 | // case tree_reduce_block: |
3868 | // this barrier should be visible to a customer and to the threading profile |
3869 | // tool (it's a terminating barrier on constructs if NOWAIT not specified) |
3870 | #if OMPT_SUPPORT |
3871 | ompt_frame_t *ompt_frame; |
3872 | if (ompt_enabled.enabled) { |
3873 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
3874 | if (ompt_frame->enter_frame.ptr == NULL) |
3875 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
3876 | } |
3877 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
3878 | #endif |
3879 | #if USE_ITT_NOTIFY |
3880 | __kmp_threads[global_tid]->th.th_ident = |
3881 | loc; // needed for correct notification of frames |
3882 | #endif |
3883 | retval = |
3884 | __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), |
3885 | gtid: global_tid, TRUE, reduce_size, reduce_data, reduce: reduce_func); |
3886 | retval = (retval != 0) ? (0) : (1); |
3887 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3888 | if (ompt_enabled.enabled) { |
3889 | ompt_frame->enter_frame = ompt_data_none; |
3890 | } |
3891 | #endif |
3892 | |
3893 | // all other workers except primary thread should do this pop here |
3894 | // (none of other workers except primary will enter __kmpc_end_reduce()) |
3895 | if (__kmp_env_consistency_check) { |
3896 | if (retval == 0) { // 0: all other workers; 1: primary thread |
3897 | __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc); |
3898 | } |
3899 | } |
3900 | |
3901 | } else { |
3902 | |
3903 | // should never reach this block |
3904 | KMP_ASSERT(0); // "unexpected method" |
3905 | } |
3906 | if (teams_swapped) { |
3907 | __kmp_restore_swapped_teams(th, team, task_state); |
3908 | } |
3909 | |
3910 | KA_TRACE(10, |
3911 | ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n" , |
3912 | global_tid, packed_reduction_method, retval)); |
3913 | return retval; |
3914 | } |
3915 | |
3916 | /*! |
3917 | @ingroup SYNCHRONIZATION |
3918 | @param loc source location information |
3919 | @param global_tid global thread id. |
3920 | @param lck pointer to the unique lock data structure |
3921 | |
3922 | Finish the execution of a blocking reduce. |
3923 | The <tt>lck</tt> pointer must be the same as that used in the corresponding |
3924 | start function. |
3925 | */ |
3926 | void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, |
3927 | kmp_critical_name *lck) { |
3928 | |
3929 | PACKED_REDUCTION_METHOD_T packed_reduction_method; |
3930 | kmp_info_t *th; |
3931 | kmp_team_t *team; |
3932 | int teams_swapped = 0, task_state; |
3933 | |
3934 | KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n" , global_tid)); |
3935 | __kmp_assert_valid_gtid(gtid: global_tid); |
3936 | |
3937 | th = __kmp_thread_from_gtid(gtid: global_tid); |
3938 | teams_swapped = __kmp_swap_teams_for_teams_reduction(th, team_p: &team, task_state: &task_state); |
3939 | |
3940 | packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); |
3941 | |
3942 | // this barrier should be visible to a customer and to the threading profile |
3943 | // tool (it's a terminating barrier on constructs if NOWAIT not specified) |
3944 | OMPT_REDUCTION_DECL(th, global_tid); |
3945 | |
3946 | if (packed_reduction_method == critical_reduce_block) { |
3947 | __kmp_end_critical_section_reduce_block(loc, global_tid, crit: lck); |
3948 | |
3949 | OMPT_REDUCTION_END; |
3950 | |
3951 | // TODO: implicit barrier: should be exposed |
3952 | #if OMPT_SUPPORT |
3953 | ompt_frame_t *ompt_frame; |
3954 | if (ompt_enabled.enabled) { |
3955 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
3956 | if (ompt_frame->enter_frame.ptr == NULL) |
3957 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
3958 | } |
3959 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
3960 | #endif |
3961 | #if USE_ITT_NOTIFY |
3962 | __kmp_threads[global_tid]->th.th_ident = loc; |
3963 | #endif |
3964 | __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL); |
3965 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3966 | if (ompt_enabled.enabled) { |
3967 | ompt_frame->enter_frame = ompt_data_none; |
3968 | } |
3969 | #endif |
3970 | |
3971 | } else if (packed_reduction_method == empty_reduce_block) { |
3972 | |
3973 | OMPT_REDUCTION_END; |
3974 | |
3975 | // usage: if team size==1, no synchronization is required (Intel platforms only) |
3976 | |
3977 | // TODO: implicit barrier: should be exposed |
3978 | #if OMPT_SUPPORT |
3979 | ompt_frame_t *ompt_frame; |
3980 | if (ompt_enabled.enabled) { |
3981 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
3982 | if (ompt_frame->enter_frame.ptr == NULL) |
3983 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
3984 | } |
3985 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
3986 | #endif |
3987 | #if USE_ITT_NOTIFY |
3988 | __kmp_threads[global_tid]->th.th_ident = loc; |
3989 | #endif |
3990 | __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL); |
3991 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3992 | if (ompt_enabled.enabled) { |
3993 | ompt_frame->enter_frame = ompt_data_none; |
3994 | } |
3995 | #endif |
3996 | |
3997 | } else if (packed_reduction_method == atomic_reduce_block) { |
3998 | |
3999 | #if OMPT_SUPPORT |
4000 | ompt_frame_t *ompt_frame; |
4001 | if (ompt_enabled.enabled) { |
4002 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
4003 | if (ompt_frame->enter_frame.ptr == NULL) |
4004 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
4005 | } |
4006 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
4007 | #endif |
4008 | // TODO: implicit barrier: should be exposed |
4009 | #if USE_ITT_NOTIFY |
4010 | __kmp_threads[global_tid]->th.th_ident = loc; |
4011 | #endif |
4012 | __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL); |
4013 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4014 | if (ompt_enabled.enabled) { |
4015 | ompt_frame->enter_frame = ompt_data_none; |
4016 | } |
4017 | #endif |
4018 | |
4019 | } else if (TEST_REDUCTION_METHOD(packed_reduction_method, |
4020 | tree_reduce_block)) { |
4021 | |
4022 | // only primary thread executes here (primary releases all other workers) |
4023 | __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), |
4024 | gtid: global_tid); |
4025 | |
4026 | } else { |
4027 | |
4028 | // should never reach this block |
4029 | KMP_ASSERT(0); // "unexpected method" |
4030 | } |
4031 | if (teams_swapped) { |
4032 | __kmp_restore_swapped_teams(th, team, task_state); |
4033 | } |
4034 | |
4035 | if (__kmp_env_consistency_check) |
4036 | __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc); |
4037 | |
4038 | KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n" , |
4039 | global_tid, packed_reduction_method)); |
4040 | |
4041 | return; |
4042 | } |
4043 | |
4044 | #undef __KMP_GET_REDUCTION_METHOD |
4045 | #undef __KMP_SET_REDUCTION_METHOD |
4046 | |
4047 | /* end of interface to fast scalable reduce routines */ |
4048 | |
4049 | kmp_uint64 __kmpc_get_taskid() { |
4050 | |
4051 | kmp_int32 gtid; |
4052 | kmp_info_t *thread; |
4053 | |
4054 | gtid = __kmp_get_gtid(); |
4055 | if (gtid < 0) { |
4056 | return 0; |
4057 | } |
4058 | thread = __kmp_thread_from_gtid(gtid); |
4059 | return thread->th.th_current_task->td_task_id; |
4060 | |
4061 | } // __kmpc_get_taskid |
4062 | |
4063 | kmp_uint64 __kmpc_get_parent_taskid() { |
4064 | |
4065 | kmp_int32 gtid; |
4066 | kmp_info_t *thread; |
4067 | kmp_taskdata_t *parent_task; |
4068 | |
4069 | gtid = __kmp_get_gtid(); |
4070 | if (gtid < 0) { |
4071 | return 0; |
4072 | } |
4073 | thread = __kmp_thread_from_gtid(gtid); |
4074 | parent_task = thread->th.th_current_task->td_parent; |
4075 | return (parent_task == NULL ? 0 : parent_task->td_task_id); |
4076 | |
4077 | } // __kmpc_get_parent_taskid |
4078 | |
4079 | /*! |
4080 | @ingroup WORK_SHARING |
4081 | @param loc source location information. |
4082 | @param gtid global thread number. |
4083 | @param num_dims number of associated doacross loops. |
4084 | @param dims info on loops bounds. |
4085 | |
4086 | Initialize doacross loop information. |
4087 | Expect compiler send us inclusive bounds, |
4088 | e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. |
4089 | */ |
4090 | void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, |
4091 | const struct kmp_dim *dims) { |
4092 | __kmp_assert_valid_gtid(gtid); |
4093 | int j, idx; |
4094 | kmp_int64 last, trace_count; |
4095 | kmp_info_t *th = __kmp_threads[gtid]; |
4096 | kmp_team_t *team = th->th.th_team; |
4097 | kmp_uint32 *flags; |
4098 | kmp_disp_t *pr_buf = th->th.th_dispatch; |
4099 | dispatch_shared_info_t *sh_buf; |
4100 | |
4101 | KA_TRACE( |
4102 | 20, |
4103 | ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n" , |
4104 | gtid, num_dims, !team->t.t_serialized)); |
4105 | KMP_DEBUG_ASSERT(dims != NULL); |
4106 | KMP_DEBUG_ASSERT(num_dims > 0); |
4107 | |
4108 | if (team->t.t_serialized) { |
4109 | KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n" )); |
4110 | return; // no dependencies if team is serialized |
4111 | } |
4112 | KMP_DEBUG_ASSERT(team->t.t_nproc > 1); |
4113 | idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for |
4114 | // the next loop |
4115 | sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; |
4116 | |
4117 | // Save bounds info into allocated private buffer |
4118 | KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); |
4119 | pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( |
4120 | th, sizeof(kmp_int64) * (4 * num_dims + 1)); |
4121 | KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); |
4122 | pr_buf->th_doacross_info[0] = |
4123 | (kmp_int64)num_dims; // first element is number of dimensions |
4124 | // Save also address of num_done in order to access it later without knowing |
4125 | // the buffer index |
4126 | pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; |
4127 | pr_buf->th_doacross_info[2] = dims[0].lo; |
4128 | pr_buf->th_doacross_info[3] = dims[0].up; |
4129 | pr_buf->th_doacross_info[4] = dims[0].st; |
4130 | last = 5; |
4131 | for (j = 1; j < num_dims; ++j) { |
4132 | kmp_int64 |
4133 | range_length; // To keep ranges of all dimensions but the first dims[0] |
4134 | if (dims[j].st == 1) { // most common case |
4135 | // AC: should we care of ranges bigger than LLONG_MAX? (not for now) |
4136 | range_length = dims[j].up - dims[j].lo + 1; |
4137 | } else { |
4138 | if (dims[j].st > 0) { |
4139 | KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); |
4140 | range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; |
4141 | } else { // negative increment |
4142 | KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); |
4143 | range_length = |
4144 | (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; |
4145 | } |
4146 | } |
4147 | pr_buf->th_doacross_info[last++] = range_length; |
4148 | pr_buf->th_doacross_info[last++] = dims[j].lo; |
4149 | pr_buf->th_doacross_info[last++] = dims[j].up; |
4150 | pr_buf->th_doacross_info[last++] = dims[j].st; |
4151 | } |
4152 | |
4153 | // Compute total trip count. |
4154 | // Start with range of dims[0] which we don't need to keep in the buffer. |
4155 | if (dims[0].st == 1) { // most common case |
4156 | trace_count = dims[0].up - dims[0].lo + 1; |
4157 | } else if (dims[0].st > 0) { |
4158 | KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); |
4159 | trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; |
4160 | } else { // negative increment |
4161 | KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); |
4162 | trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; |
4163 | } |
4164 | for (j = 1; j < num_dims; ++j) { |
4165 | trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges |
4166 | } |
4167 | KMP_DEBUG_ASSERT(trace_count > 0); |
4168 | |
4169 | // Check if shared buffer is not occupied by other loop (idx - |
4170 | // __kmp_dispatch_num_buffers) |
4171 | if (idx != sh_buf->doacross_buf_idx) { |
4172 | // Shared buffer is occupied, wait for it to be free |
4173 | __kmp_wait_4(spinner: (volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, checker: idx, |
4174 | pred: __kmp_eq_4, NULL); |
4175 | } |
4176 | #if KMP_32_BIT_ARCH |
4177 | // Check if we are the first thread. After the CAS the first thread gets 0, |
4178 | // others get 1 if initialization is in progress, allocated pointer otherwise. |
4179 | // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. |
4180 | flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( |
4181 | (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); |
4182 | #else |
4183 | flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( |
4184 | (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); |
4185 | #endif |
4186 | if (flags == NULL) { |
4187 | // we are the first thread, allocate the array of flags |
4188 | size_t size = |
4189 | (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration |
4190 | flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); |
4191 | KMP_MB(); |
4192 | sh_buf->doacross_flags = flags; |
4193 | } else if (flags == (kmp_uint32 *)1) { |
4194 | #if KMP_32_BIT_ARCH |
4195 | // initialization is still in progress, need to wait |
4196 | while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) |
4197 | #else |
4198 | while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) |
4199 | #endif |
4200 | KMP_YIELD(TRUE); |
4201 | KMP_MB(); |
4202 | } else { |
4203 | KMP_MB(); |
4204 | } |
4205 | KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value |
4206 | pr_buf->th_doacross_flags = |
4207 | sh_buf->doacross_flags; // save private copy in order to not |
4208 | // touch shared buffer on each iteration |
4209 | KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n" , gtid)); |
4210 | } |
4211 | |
4212 | void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { |
4213 | __kmp_assert_valid_gtid(gtid); |
4214 | kmp_int64 shft; |
4215 | size_t num_dims, i; |
4216 | kmp_uint32 flag; |
4217 | kmp_int64 iter_number; // iteration number of "collapsed" loop nest |
4218 | kmp_info_t *th = __kmp_threads[gtid]; |
4219 | kmp_team_t *team = th->th.th_team; |
4220 | kmp_disp_t *pr_buf; |
4221 | kmp_int64 lo, up, st; |
4222 | |
4223 | KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n" , gtid)); |
4224 | if (team->t.t_serialized) { |
4225 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n" )); |
4226 | return; // no dependencies if team is serialized |
4227 | } |
4228 | |
4229 | // calculate sequential iteration number and check out-of-bounds condition |
4230 | pr_buf = th->th.th_dispatch; |
4231 | KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); |
4232 | num_dims = (size_t)pr_buf->th_doacross_info[0]; |
4233 | lo = pr_buf->th_doacross_info[2]; |
4234 | up = pr_buf->th_doacross_info[3]; |
4235 | st = pr_buf->th_doacross_info[4]; |
4236 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4237 | SimpleVLA<ompt_dependence_t> deps(num_dims); |
4238 | #endif |
4239 | if (st == 1) { // most common case |
4240 | if (vec[0] < lo || vec[0] > up) { |
4241 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " |
4242 | "bounds [%lld,%lld]\n" , |
4243 | gtid, vec[0], lo, up)); |
4244 | return; |
4245 | } |
4246 | iter_number = vec[0] - lo; |
4247 | } else if (st > 0) { |
4248 | if (vec[0] < lo || vec[0] > up) { |
4249 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " |
4250 | "bounds [%lld,%lld]\n" , |
4251 | gtid, vec[0], lo, up)); |
4252 | return; |
4253 | } |
4254 | iter_number = (kmp_uint64)(vec[0] - lo) / st; |
4255 | } else { // negative increment |
4256 | if (vec[0] > lo || vec[0] < up) { |
4257 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " |
4258 | "bounds [%lld,%lld]\n" , |
4259 | gtid, vec[0], lo, up)); |
4260 | return; |
4261 | } |
4262 | iter_number = (kmp_uint64)(lo - vec[0]) / (-st); |
4263 | } |
4264 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4265 | deps[0].variable.value = iter_number; |
4266 | deps[0].dependence_type = ompt_dependence_type_sink; |
4267 | #endif |
4268 | for (i = 1; i < num_dims; ++i) { |
4269 | kmp_int64 iter, ln; |
4270 | size_t j = i * 4; |
4271 | ln = pr_buf->th_doacross_info[j + 1]; |
4272 | lo = pr_buf->th_doacross_info[j + 2]; |
4273 | up = pr_buf->th_doacross_info[j + 3]; |
4274 | st = pr_buf->th_doacross_info[j + 4]; |
4275 | if (st == 1) { |
4276 | if (vec[i] < lo || vec[i] > up) { |
4277 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " |
4278 | "bounds [%lld,%lld]\n" , |
4279 | gtid, vec[i], lo, up)); |
4280 | return; |
4281 | } |
4282 | iter = vec[i] - lo; |
4283 | } else if (st > 0) { |
4284 | if (vec[i] < lo || vec[i] > up) { |
4285 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " |
4286 | "bounds [%lld,%lld]\n" , |
4287 | gtid, vec[i], lo, up)); |
4288 | return; |
4289 | } |
4290 | iter = (kmp_uint64)(vec[i] - lo) / st; |
4291 | } else { // st < 0 |
4292 | if (vec[i] > lo || vec[i] < up) { |
4293 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " |
4294 | "bounds [%lld,%lld]\n" , |
4295 | gtid, vec[i], lo, up)); |
4296 | return; |
4297 | } |
4298 | iter = (kmp_uint64)(lo - vec[i]) / (-st); |
4299 | } |
4300 | iter_number = iter + ln * iter_number; |
4301 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4302 | deps[i].variable.value = iter; |
4303 | deps[i].dependence_type = ompt_dependence_type_sink; |
4304 | #endif |
4305 | } |
4306 | shft = iter_number % 32; // use 32-bit granularity |
4307 | iter_number >>= 5; // divided by 32 |
4308 | flag = 1 << shft; |
4309 | while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { |
4310 | KMP_YIELD(TRUE); |
4311 | } |
4312 | KMP_MB(); |
4313 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4314 | if (ompt_enabled.ompt_callback_dependences) { |
4315 | ompt_callbacks.ompt_callback(ompt_callback_dependences)( |
4316 | &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); |
4317 | } |
4318 | #endif |
4319 | KA_TRACE(20, |
4320 | ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n" , |
4321 | gtid, (iter_number << 5) + shft)); |
4322 | } |
4323 | |
4324 | void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { |
4325 | __kmp_assert_valid_gtid(gtid); |
4326 | kmp_int64 shft; |
4327 | size_t num_dims, i; |
4328 | kmp_uint32 flag; |
4329 | kmp_int64 iter_number; // iteration number of "collapsed" loop nest |
4330 | kmp_info_t *th = __kmp_threads[gtid]; |
4331 | kmp_team_t *team = th->th.th_team; |
4332 | kmp_disp_t *pr_buf; |
4333 | kmp_int64 lo, st; |
4334 | |
4335 | KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n" , gtid)); |
4336 | if (team->t.t_serialized) { |
4337 | KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n" )); |
4338 | return; // no dependencies if team is serialized |
4339 | } |
4340 | |
4341 | // calculate sequential iteration number (same as in "wait" but no |
4342 | // out-of-bounds checks) |
4343 | pr_buf = th->th.th_dispatch; |
4344 | KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); |
4345 | num_dims = (size_t)pr_buf->th_doacross_info[0]; |
4346 | lo = pr_buf->th_doacross_info[2]; |
4347 | st = pr_buf->th_doacross_info[4]; |
4348 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4349 | SimpleVLA<ompt_dependence_t> deps(num_dims); |
4350 | #endif |
4351 | if (st == 1) { // most common case |
4352 | iter_number = vec[0] - lo; |
4353 | } else if (st > 0) { |
4354 | iter_number = (kmp_uint64)(vec[0] - lo) / st; |
4355 | } else { // negative increment |
4356 | iter_number = (kmp_uint64)(lo - vec[0]) / (-st); |
4357 | } |
4358 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4359 | deps[0].variable.value = iter_number; |
4360 | deps[0].dependence_type = ompt_dependence_type_source; |
4361 | #endif |
4362 | for (i = 1; i < num_dims; ++i) { |
4363 | kmp_int64 iter, ln; |
4364 | size_t j = i * 4; |
4365 | ln = pr_buf->th_doacross_info[j + 1]; |
4366 | lo = pr_buf->th_doacross_info[j + 2]; |
4367 | st = pr_buf->th_doacross_info[j + 4]; |
4368 | if (st == 1) { |
4369 | iter = vec[i] - lo; |
4370 | } else if (st > 0) { |
4371 | iter = (kmp_uint64)(vec[i] - lo) / st; |
4372 | } else { // st < 0 |
4373 | iter = (kmp_uint64)(lo - vec[i]) / (-st); |
4374 | } |
4375 | iter_number = iter + ln * iter_number; |
4376 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4377 | deps[i].variable.value = iter; |
4378 | deps[i].dependence_type = ompt_dependence_type_source; |
4379 | #endif |
4380 | } |
4381 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4382 | if (ompt_enabled.ompt_callback_dependences) { |
4383 | ompt_callbacks.ompt_callback(ompt_callback_dependences)( |
4384 | &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); |
4385 | } |
4386 | #endif |
4387 | shft = iter_number % 32; // use 32-bit granularity |
4388 | iter_number >>= 5; // divided by 32 |
4389 | flag = 1 << shft; |
4390 | KMP_MB(); |
4391 | if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) |
4392 | KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); |
4393 | KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n" , gtid, |
4394 | (iter_number << 5) + shft)); |
4395 | } |
4396 | |
4397 | void __kmpc_doacross_fini(ident_t *loc, int gtid) { |
4398 | __kmp_assert_valid_gtid(gtid); |
4399 | kmp_int32 num_done; |
4400 | kmp_info_t *th = __kmp_threads[gtid]; |
4401 | kmp_team_t *team = th->th.th_team; |
4402 | kmp_disp_t *pr_buf = th->th.th_dispatch; |
4403 | |
4404 | KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n" , gtid)); |
4405 | if (team->t.t_serialized) { |
4406 | KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n" , team)); |
4407 | return; // nothing to do |
4408 | } |
4409 | num_done = |
4410 | KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; |
4411 | if (num_done == th->th.th_team_nproc) { |
4412 | // we are the last thread, need to free shared resources |
4413 | int idx = pr_buf->th_doacross_buf_idx - 1; |
4414 | dispatch_shared_info_t *sh_buf = |
4415 | &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; |
4416 | KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == |
4417 | (kmp_int64)&sh_buf->doacross_num_done); |
4418 | KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); |
4419 | KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); |
4420 | __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); |
4421 | sh_buf->doacross_flags = NULL; |
4422 | sh_buf->doacross_num_done = 0; |
4423 | sh_buf->doacross_buf_idx += |
4424 | __kmp_dispatch_num_buffers; // free buffer for future re-use |
4425 | } |
4426 | // free private resources (need to keep buffer index forever) |
4427 | pr_buf->th_doacross_flags = NULL; |
4428 | __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); |
4429 | pr_buf->th_doacross_info = NULL; |
4430 | KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n" , gtid)); |
4431 | } |
4432 | |
4433 | /* OpenMP 5.1 Memory Management routines */ |
4434 | void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { |
4435 | return __kmp_alloc(__kmp_entry_gtid(), align: 0, sz: size, al: allocator); |
4436 | } |
4437 | |
4438 | void *omp_aligned_alloc(size_t align, size_t size, |
4439 | omp_allocator_handle_t allocator) { |
4440 | return __kmp_alloc(__kmp_entry_gtid(), align, sz: size, al: allocator); |
4441 | } |
4442 | |
4443 | void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { |
4444 | return __kmp_calloc(__kmp_entry_gtid(), align: 0, nmemb, sz: size, al: allocator); |
4445 | } |
4446 | |
4447 | void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size, |
4448 | omp_allocator_handle_t allocator) { |
4449 | return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, sz: size, al: allocator); |
4450 | } |
4451 | |
4452 | void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, |
4453 | omp_allocator_handle_t free_allocator) { |
4454 | return __kmp_realloc(__kmp_entry_gtid(), ptr, sz: size, al: allocator, |
4455 | free_al: free_allocator); |
4456 | } |
4457 | |
4458 | void omp_free(void *ptr, omp_allocator_handle_t allocator) { |
4459 | ___kmpc_free(__kmp_entry_gtid(), ptr, al: allocator); |
4460 | } |
4461 | /* end of OpenMP 5.1 Memory Management routines */ |
4462 | |
4463 | int __kmpc_get_target_offload(void) { |
4464 | if (!__kmp_init_serial) { |
4465 | __kmp_serial_initialize(); |
4466 | } |
4467 | return __kmp_target_offload; |
4468 | } |
4469 | |
4470 | int __kmpc_pause_resource(kmp_pause_status_t level) { |
4471 | if (!__kmp_init_serial) { |
4472 | return 1; // Can't pause if runtime is not initialized |
4473 | } |
4474 | return __kmp_pause_resource(level); |
4475 | } |
4476 | |
4477 | void __kmpc_error(ident_t *loc, int severity, const char *message) { |
4478 | if (!__kmp_init_serial) |
4479 | __kmp_serial_initialize(); |
4480 | |
4481 | KMP_ASSERT(severity == severity_warning || severity == severity_fatal); |
4482 | |
4483 | #if OMPT_SUPPORT |
4484 | if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { |
4485 | ompt_callbacks.ompt_callback(ompt_callback_error)( |
4486 | (ompt_severity_t)severity, message, KMP_STRLEN(s: message), |
4487 | OMPT_GET_RETURN_ADDRESS(0)); |
4488 | } |
4489 | #endif // OMPT_SUPPORT |
4490 | |
4491 | char *src_loc; |
4492 | if (loc && loc->psource) { |
4493 | kmp_str_loc_t str_loc = __kmp_str_loc_init(psource: loc->psource, init_fname: false); |
4494 | src_loc = |
4495 | __kmp_str_format(format: "%s:%d:%d" , str_loc.file, str_loc.line, str_loc.col); |
4496 | __kmp_str_loc_free(loc: &str_loc); |
4497 | } else { |
4498 | src_loc = __kmp_str_format(format: "unknown" ); |
4499 | } |
4500 | |
4501 | if (severity == severity_warning) |
4502 | KMP_WARNING(UserDirectedWarning, src_loc, message); |
4503 | else |
4504 | KMP_FATAL(UserDirectedError, src_loc, message); |
4505 | |
4506 | __kmp_str_free(str: &src_loc); |
4507 | } |
4508 | |
4509 | // Mark begin of scope directive. |
4510 | void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { |
4511 | // reserved is for extension of scope directive and not used. |
4512 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4513 | if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { |
4514 | kmp_team_t *team = __kmp_threads[gtid]->th.th_team; |
4515 | int tid = __kmp_tid_from_gtid(gtid); |
4516 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
4517 | ompt_work_scope, ompt_scope_begin, |
4518 | &(team->t.ompt_team_info.parallel_data), |
4519 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, |
4520 | OMPT_GET_RETURN_ADDRESS(0)); |
4521 | } |
4522 | #endif // OMPT_SUPPORT && OMPT_OPTIONAL |
4523 | } |
4524 | |
4525 | // Mark end of scope directive |
4526 | void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { |
4527 | // reserved is for extension of scope directive and not used. |
4528 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4529 | if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { |
4530 | kmp_team_t *team = __kmp_threads[gtid]->th.th_team; |
4531 | int tid = __kmp_tid_from_gtid(gtid); |
4532 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
4533 | ompt_work_scope, ompt_scope_end, |
4534 | &(team->t.ompt_team_info.parallel_data), |
4535 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, |
4536 | OMPT_GET_RETURN_ADDRESS(0)); |
4537 | } |
4538 | #endif // OMPT_SUPPORT && OMPT_OPTIONAL |
4539 | } |
4540 | |
4541 | #ifdef KMP_USE_VERSION_SYMBOLS |
4542 | // For GOMP compatibility there are two versions of each omp_* API. |
4543 | // One is the plain C symbol and one is the Fortran symbol with an appended |
4544 | // underscore. When we implement a specific ompc_* version of an omp_* |
4545 | // function, we want the plain GOMP versioned symbol to alias the ompc_* version |
4546 | // instead of the Fortran versions in kmp_ftn_entry.h |
4547 | extern "C" { |
4548 | // Have to undef these from omp.h so they aren't translated into |
4549 | // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below |
4550 | #ifdef omp_set_affinity_format |
4551 | #undef omp_set_affinity_format |
4552 | #endif |
4553 | #ifdef omp_get_affinity_format |
4554 | #undef omp_get_affinity_format |
4555 | #endif |
4556 | #ifdef omp_display_affinity |
4557 | #undef omp_display_affinity |
4558 | #endif |
4559 | #ifdef omp_capture_affinity |
4560 | #undef omp_capture_affinity |
4561 | #endif |
4562 | KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50, |
4563 | "OMP_5.0" ); |
4564 | KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50, |
4565 | "OMP_5.0" ); |
4566 | KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50, |
4567 | "OMP_5.0" ); |
4568 | KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50, |
4569 | "OMP_5.0" ); |
4570 | } // extern "C" |
4571 | #endif |
4572 | |