1 | /* |
2 | * kmp_csupport.cpp -- kfront linkage support for OpenMP. |
3 | */ |
4 | |
5 | //===----------------------------------------------------------------------===// |
6 | // |
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | // See https://llvm.org/LICENSE.txt for license information. |
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #define __KMP_IMP |
14 | #include "omp.h" /* extern "C" declarations of user-visible routines */ |
15 | #include "kmp.h" |
16 | #include "kmp_error.h" |
17 | #include "kmp_i18n.h" |
18 | #include "kmp_itt.h" |
19 | #include "kmp_lock.h" |
20 | #include "kmp_stats.h" |
21 | #include "kmp_utils.h" |
22 | #include "ompt-specific.h" |
23 | |
24 | #define MAX_MESSAGE 512 |
25 | |
26 | // flags will be used in future, e.g. to implement openmp_strict library |
27 | // restrictions |
28 | |
29 | /*! |
30 | * @ingroup STARTUP_SHUTDOWN |
31 | * @param loc in source location information |
32 | * @param flags in for future use (currently ignored) |
33 | * |
34 | * Initialize the runtime library. This call is optional; if it is not made then |
35 | * it will be implicitly called by attempts to use other library functions. |
36 | */ |
37 | void __kmpc_begin(ident_t *loc, kmp_int32 flags) { |
38 | // By default __kmpc_begin() is no-op. |
39 | char *env; |
40 | if ((env = getenv(name: "KMP_INITIAL_THREAD_BIND" )) != NULL && |
41 | __kmp_str_match_true(data: env)) { |
42 | __kmp_middle_initialize(); |
43 | __kmp_assign_root_init_mask(); |
44 | KC_TRACE(10, ("__kmpc_begin: middle initialization called\n" )); |
45 | } else if (__kmp_ignore_mppbeg() == FALSE) { |
46 | // By default __kmp_ignore_mppbeg() returns TRUE. |
47 | __kmp_internal_begin(); |
48 | KC_TRACE(10, ("__kmpc_begin: called\n" )); |
49 | } |
50 | } |
51 | |
52 | /*! |
53 | * @ingroup STARTUP_SHUTDOWN |
54 | * @param loc source location information |
55 | * |
56 | * Shutdown the runtime library. This is also optional, and even if called will |
57 | * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to |
58 | * zero. |
59 | */ |
60 | void __kmpc_end(ident_t *loc) { |
61 | // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() |
62 | // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND |
63 | // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() |
64 | // returns FALSE and __kmpc_end() will unregister this root (it can cause |
65 | // library shut down). |
66 | if (__kmp_ignore_mppend() == FALSE) { |
67 | KC_TRACE(10, ("__kmpc_end: called\n" )); |
68 | KA_TRACE(30, ("__kmpc_end\n" )); |
69 | |
70 | __kmp_internal_end_thread(gtid: -1); |
71 | } |
72 | #if KMP_OS_WINDOWS && OMPT_SUPPORT |
73 | // Normal exit process on Windows does not allow worker threads of the final |
74 | // parallel region to finish reporting their events, so shutting down the |
75 | // library here fixes the issue at least for the cases where __kmpc_end() is |
76 | // placed properly. |
77 | if (ompt_enabled.enabled) |
78 | __kmp_internal_end_library(__kmp_gtid_get_specific()); |
79 | #endif |
80 | } |
81 | |
82 | /*! |
83 | @ingroup THREAD_STATES |
84 | @param loc Source location information. |
85 | @return The global thread index of the active thread. |
86 | |
87 | This function can be called in any context. |
88 | |
89 | If the runtime has ony been entered at the outermost level from a |
90 | single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is |
91 | that which would be returned by omp_get_thread_num() in the outermost |
92 | active parallel construct. (Or zero if there is no active parallel |
93 | construct, since the primary thread is necessarily thread zero). |
94 | |
95 | If multiple non-OpenMP threads all enter an OpenMP construct then this |
96 | will be a unique thread identifier among all the threads created by |
97 | the OpenMP runtime (but the value cannot be defined in terms of |
98 | OpenMP thread ids returned by omp_get_thread_num()). |
99 | */ |
100 | kmp_int32 __kmpc_global_thread_num(ident_t *loc) { |
101 | kmp_int32 gtid = __kmp_entry_gtid(); |
102 | |
103 | KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n" , gtid)); |
104 | |
105 | return gtid; |
106 | } |
107 | |
108 | /*! |
109 | @ingroup THREAD_STATES |
110 | @param loc Source location information. |
111 | @return The number of threads under control of the OpenMP<sup>*</sup> runtime |
112 | |
113 | This function can be called in any context. |
114 | It returns the total number of threads under the control of the OpenMP runtime. |
115 | That is not a number that can be determined by any OpenMP standard calls, since |
116 | the library may be called from more than one non-OpenMP thread, and this |
117 | reflects the total over all such calls. Similarly the runtime maintains |
118 | underlying threads even when they are not active (since the cost of creating |
119 | and destroying OS threads is high), this call counts all such threads even if |
120 | they are not waiting for work. |
121 | */ |
122 | kmp_int32 __kmpc_global_num_threads(ident_t *loc) { |
123 | KC_TRACE(10, |
124 | ("__kmpc_global_num_threads: num_threads = %d\n" , __kmp_all_nth)); |
125 | |
126 | return TCR_4(__kmp_all_nth); |
127 | } |
128 | |
129 | /*! |
130 | @ingroup THREAD_STATES |
131 | @param loc Source location information. |
132 | @return The thread number of the calling thread in the innermost active parallel |
133 | construct. |
134 | */ |
135 | kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { |
136 | KC_TRACE(10, ("__kmpc_bound_thread_num: called\n" )); |
137 | return __kmp_tid_from_gtid(__kmp_entry_gtid()); |
138 | } |
139 | |
140 | /*! |
141 | @ingroup THREAD_STATES |
142 | @param loc Source location information. |
143 | @return The number of threads in the innermost active parallel construct. |
144 | */ |
145 | kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { |
146 | KC_TRACE(10, ("__kmpc_bound_num_threads: called\n" )); |
147 | |
148 | return __kmp_entry_thread()->th.th_team->t.t_nproc; |
149 | } |
150 | |
151 | /*! |
152 | * @ingroup DEPRECATED |
153 | * @param loc location description |
154 | * |
155 | * This function need not be called. It always returns TRUE. |
156 | */ |
157 | kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { |
158 | #ifndef KMP_DEBUG |
159 | |
160 | return TRUE; |
161 | |
162 | #else |
163 | |
164 | const char *semi2; |
165 | const char *semi3; |
166 | int line_no; |
167 | |
168 | if (__kmp_par_range == 0) { |
169 | return TRUE; |
170 | } |
171 | semi2 = loc->psource; |
172 | if (semi2 == NULL) { |
173 | return TRUE; |
174 | } |
175 | semi2 = strchr(s: semi2, c: ';'); |
176 | if (semi2 == NULL) { |
177 | return TRUE; |
178 | } |
179 | semi2 = strchr(s: semi2 + 1, c: ';'); |
180 | if (semi2 == NULL) { |
181 | return TRUE; |
182 | } |
183 | if (__kmp_par_range_filename[0]) { |
184 | const char *name = semi2 - 1; |
185 | while ((name > loc->psource) && (*name != '/') && (*name != ';')) { |
186 | name--; |
187 | } |
188 | if ((*name == '/') || (*name == ';')) { |
189 | name++; |
190 | } |
191 | if (strncmp(s1: __kmp_par_range_filename, s2: name, n: semi2 - name)) { |
192 | return __kmp_par_range < 0; |
193 | } |
194 | } |
195 | semi3 = strchr(s: semi2 + 1, c: ';'); |
196 | if (__kmp_par_range_routine[0]) { |
197 | if ((semi3 != NULL) && (semi3 > semi2) && |
198 | (strncmp(s1: __kmp_par_range_routine, s2: semi2 + 1, n: semi3 - semi2 - 1))) { |
199 | return __kmp_par_range < 0; |
200 | } |
201 | } |
202 | if (KMP_SSCANF(s: semi3 + 1, format: "%d" , &line_no) == 1) { |
203 | if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { |
204 | return __kmp_par_range > 0; |
205 | } |
206 | return __kmp_par_range < 0; |
207 | } |
208 | return TRUE; |
209 | |
210 | #endif /* KMP_DEBUG */ |
211 | } |
212 | |
213 | /*! |
214 | @ingroup THREAD_STATES |
215 | @param loc Source location information. |
216 | @return 1 if this thread is executing inside an active parallel region, zero if |
217 | not. |
218 | */ |
219 | kmp_int32 __kmpc_in_parallel(ident_t *loc) { |
220 | return __kmp_entry_thread()->th.th_root->r.r_active; |
221 | } |
222 | |
223 | /*! |
224 | @ingroup PARALLEL |
225 | @param loc source location information |
226 | @param global_tid global thread number |
227 | @param num_threads number of threads requested for this parallel construct |
228 | |
229 | Set the number of threads to be used by the next fork spawned by this thread. |
230 | This call is only required if the parallel construct has a `num_threads` clause. |
231 | */ |
232 | void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, |
233 | kmp_int32 num_threads) { |
234 | KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n" , |
235 | global_tid, num_threads)); |
236 | __kmp_assert_valid_gtid(gtid: global_tid); |
237 | __kmp_push_num_threads(loc, gtid: global_tid, num_threads); |
238 | } |
239 | |
240 | void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 global_tid, |
241 | kmp_int32 num_threads, int severity, |
242 | const char *message) { |
243 | __kmp_push_num_threads(loc, gtid: global_tid, num_threads); |
244 | __kmp_set_strict_num_threads(loc, gtid: global_tid, sev: severity, msg: message); |
245 | } |
246 | |
247 | /*! |
248 | @ingroup PARALLEL |
249 | @param loc source location information |
250 | @param global_tid global thread number |
251 | @param list_length number of entries in the num_threads_list array |
252 | @param num_threads_list array of numbers of threads requested for this parallel |
253 | construct and subsequent nested parallel constructs |
254 | |
255 | Set the number of threads to be used by the next fork spawned by this thread, |
256 | and some nested forks as well. |
257 | This call is only required if the parallel construct has a `num_threads` clause |
258 | that has a list of integers as the argument. |
259 | */ |
260 | void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid, |
261 | kmp_uint32 list_length, |
262 | kmp_int32 *num_threads_list) { |
263 | KA_TRACE(20, ("__kmpc_push_num_threads_list: enter T#%d num_threads_list=" , |
264 | global_tid)); |
265 | KA_TRACE(20, ("%d" , num_threads_list[0])); |
266 | #ifdef KMP_DEBUG |
267 | for (kmp_uint32 i = 1; i < list_length; ++i) |
268 | KA_TRACE(20, (", %d" , num_threads_list[i])); |
269 | #endif |
270 | KA_TRACE(20, ("/n" )); |
271 | |
272 | __kmp_assert_valid_gtid(gtid: global_tid); |
273 | __kmp_push_num_threads_list(loc, gtid: global_tid, list_length, num_threads_list); |
274 | } |
275 | |
276 | void __kmpc_push_num_threads_list_strict(ident_t *loc, kmp_int32 global_tid, |
277 | kmp_uint32 list_length, |
278 | kmp_int32 *num_threads_list, |
279 | int severity, const char *message) { |
280 | __kmp_push_num_threads_list(loc, gtid: global_tid, list_length, num_threads_list); |
281 | __kmp_set_strict_num_threads(loc, gtid: global_tid, sev: severity, msg: message); |
282 | } |
283 | |
284 | void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { |
285 | KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n" )); |
286 | /* the num_threads are automatically popped */ |
287 | } |
288 | |
289 | void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, |
290 | kmp_int32 proc_bind) { |
291 | KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n" , global_tid, |
292 | proc_bind)); |
293 | __kmp_assert_valid_gtid(gtid: global_tid); |
294 | __kmp_push_proc_bind(loc, gtid: global_tid, proc_bind: (kmp_proc_bind_t)proc_bind); |
295 | } |
296 | |
297 | /*! |
298 | @ingroup PARALLEL |
299 | @param loc source location information |
300 | @param argc total number of arguments in the ellipsis |
301 | @param microtask pointer to callback routine consisting of outlined parallel |
302 | construct |
303 | @param ... pointers to shared variables that aren't global |
304 | |
305 | Do the actual fork and call the microtask in the relevant number of threads. |
306 | */ |
307 | void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { |
308 | int gtid = __kmp_entry_gtid(); |
309 | |
310 | #if (KMP_STATS_ENABLED) |
311 | // If we were in a serial region, then stop the serial timer, record |
312 | // the event, and start parallel region timer |
313 | stats_state_e previous_state = KMP_GET_THREAD_STATE(); |
314 | if (previous_state == stats_state_e::SERIAL_REGION) { |
315 | KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); |
316 | } else { |
317 | KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); |
318 | } |
319 | int inParallel = __kmpc_in_parallel(loc); |
320 | if (inParallel) { |
321 | KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); |
322 | } else { |
323 | KMP_COUNT_BLOCK(OMP_PARALLEL); |
324 | } |
325 | #endif |
326 | |
327 | // maybe to save thr_state is enough here |
328 | { |
329 | va_list ap; |
330 | va_start(ap, microtask); |
331 | |
332 | #if OMPT_SUPPORT |
333 | ompt_frame_t *ompt_frame; |
334 | if (ompt_enabled.enabled) { |
335 | kmp_info_t *master_th = __kmp_threads[gtid]; |
336 | ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame; |
337 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
338 | } |
339 | OMPT_STORE_RETURN_ADDRESS(gtid); |
340 | #endif |
341 | |
342 | #if INCLUDE_SSC_MARKS |
343 | SSC_MARK_FORKING(); |
344 | #endif |
345 | __kmp_fork_call(loc, gtid, fork_context: fork_context_intel, argc, |
346 | VOLATILE_CAST(microtask_t) microtask, // "wrapped" task |
347 | VOLATILE_CAST(launch_t) __kmp_invoke_task_func, |
348 | kmp_va_addr_of(ap)); |
349 | #if INCLUDE_SSC_MARKS |
350 | SSC_MARK_JOINING(); |
351 | #endif |
352 | __kmp_join_call(loc, gtid |
353 | #if OMPT_SUPPORT |
354 | , |
355 | fork_context: fork_context_intel |
356 | #endif |
357 | ); |
358 | |
359 | va_end(ap); |
360 | |
361 | #if OMPT_SUPPORT |
362 | if (ompt_enabled.enabled) { |
363 | ompt_frame->enter_frame = ompt_data_none; |
364 | } |
365 | #endif |
366 | } |
367 | |
368 | #if KMP_STATS_ENABLED |
369 | if (previous_state == stats_state_e::SERIAL_REGION) { |
370 | KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); |
371 | KMP_SET_THREAD_STATE(previous_state); |
372 | } else { |
373 | KMP_POP_PARTITIONED_TIMER(); |
374 | } |
375 | #endif // KMP_STATS_ENABLED |
376 | } |
377 | |
378 | /*! |
379 | @ingroup PARALLEL |
380 | @param loc source location information |
381 | @param microtask pointer to callback routine consisting of outlined parallel |
382 | construct |
383 | @param cond condition for running in parallel |
384 | @param args struct of pointers to shared variables that aren't global |
385 | |
386 | Perform a fork only if the condition is true. |
387 | */ |
388 | void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, |
389 | kmp_int32 cond, void *args) { |
390 | int gtid = __kmp_entry_gtid(); |
391 | if (cond) { |
392 | if (args) |
393 | __kmpc_fork_call(loc, argc, microtask, args); |
394 | else |
395 | __kmpc_fork_call(loc, argc, microtask); |
396 | } else { |
397 | __kmpc_serialized_parallel(loc, global_tid: gtid); |
398 | |
399 | #if OMPT_SUPPORT |
400 | void *exit_frame_ptr; |
401 | #endif |
402 | |
403 | if (args) |
404 | __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid, |
405 | /*npr=*/0, |
406 | /*argc=*/1, argv: &args |
407 | #if OMPT_SUPPORT |
408 | , |
409 | exit_frame_ptr: &exit_frame_ptr |
410 | #endif |
411 | ); |
412 | else |
413 | __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid, |
414 | /*npr=*/0, |
415 | /*argc=*/0, |
416 | /*args=*/argv: nullptr |
417 | #if OMPT_SUPPORT |
418 | , |
419 | exit_frame_ptr: &exit_frame_ptr |
420 | #endif |
421 | ); |
422 | |
423 | __kmpc_end_serialized_parallel(loc, global_tid: gtid); |
424 | } |
425 | } |
426 | |
427 | /*! |
428 | @ingroup PARALLEL |
429 | @param loc source location information |
430 | @param global_tid global thread number |
431 | @param num_teams number of teams requested for the teams construct |
432 | @param num_threads number of threads per team requested for the teams construct |
433 | |
434 | Set the number of teams to be used by the teams construct. |
435 | This call is only required if the teams construct has a `num_teams` clause |
436 | or a `thread_limit` clause (or both). |
437 | */ |
438 | void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, |
439 | kmp_int32 num_teams, kmp_int32 num_threads) { |
440 | KA_TRACE(20, |
441 | ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n" , |
442 | global_tid, num_teams, num_threads)); |
443 | __kmp_assert_valid_gtid(gtid: global_tid); |
444 | __kmp_push_num_teams(loc, gtid: global_tid, num_teams, num_threads); |
445 | } |
446 | |
447 | /*! |
448 | @ingroup PARALLEL |
449 | @param loc source location information |
450 | @param global_tid global thread number |
451 | @param thread_limit limit on number of threads which can be created within the |
452 | current task |
453 | |
454 | Set the thread_limit for the current task |
455 | This call is there to support `thread_limit` clause on the `target` construct |
456 | */ |
457 | void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, |
458 | kmp_int32 thread_limit) { |
459 | __kmp_assert_valid_gtid(gtid: global_tid); |
460 | kmp_info_t *thread = __kmp_threads[global_tid]; |
461 | if (thread_limit > 0) |
462 | thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit; |
463 | } |
464 | |
465 | /*! |
466 | @ingroup PARALLEL |
467 | @param loc source location information |
468 | @param global_tid global thread number |
469 | @param num_teams_lb lower bound on number of teams requested for the teams |
470 | construct |
471 | @param num_teams_ub upper bound on number of teams requested for the teams |
472 | construct |
473 | @param num_threads number of threads per team requested for the teams construct |
474 | |
475 | Set the number of teams to be used by the teams construct. The number of initial |
476 | teams cretaed will be greater than or equal to the lower bound and less than or |
477 | equal to the upper bound. |
478 | This call is only required if the teams construct has a `num_teams` clause |
479 | or a `thread_limit` clause (or both). |
480 | */ |
481 | void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, |
482 | kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, |
483 | kmp_int32 num_threads) { |
484 | KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" |
485 | " num_teams_ub=%d num_threads=%d\n" , |
486 | global_tid, num_teams_lb, num_teams_ub, num_threads)); |
487 | __kmp_assert_valid_gtid(gtid: global_tid); |
488 | __kmp_push_num_teams_51(loc, gtid: global_tid, num_teams_lb, num_teams_ub, |
489 | num_threads); |
490 | } |
491 | |
492 | /*! |
493 | @ingroup PARALLEL |
494 | @param loc source location information |
495 | @param argc total number of arguments in the ellipsis |
496 | @param microtask pointer to callback routine consisting of outlined teams |
497 | construct |
498 | @param ... pointers to shared variables that aren't global |
499 | |
500 | Do the actual fork and call the microtask in the relevant number of threads. |
501 | */ |
502 | void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, |
503 | ...) { |
504 | int gtid = __kmp_entry_gtid(); |
505 | kmp_info_t *this_thr = __kmp_threads[gtid]; |
506 | va_list ap; |
507 | va_start(ap, microtask); |
508 | |
509 | #if KMP_STATS_ENABLED |
510 | KMP_COUNT_BLOCK(OMP_TEAMS); |
511 | stats_state_e previous_state = KMP_GET_THREAD_STATE(); |
512 | if (previous_state == stats_state_e::SERIAL_REGION) { |
513 | KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); |
514 | } else { |
515 | KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); |
516 | } |
517 | #endif |
518 | |
519 | // remember teams entry point and nesting level |
520 | this_thr->th.th_teams_microtask = microtask; |
521 | this_thr->th.th_teams_level = |
522 | this_thr->th.th_team->t.t_level; // AC: can be >0 on host |
523 | |
524 | #if OMPT_SUPPORT |
525 | kmp_team_t *parent_team = this_thr->th.th_team; |
526 | int tid = __kmp_tid_from_gtid(gtid); |
527 | if (ompt_enabled.enabled) { |
528 | parent_team->t.t_implicit_task_taskdata[tid] |
529 | .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
530 | } |
531 | OMPT_STORE_RETURN_ADDRESS(gtid); |
532 | #endif |
533 | |
534 | // check if __kmpc_push_num_teams called, set default number of teams |
535 | // otherwise |
536 | if (this_thr->th.th_teams_size.nteams == 0) { |
537 | __kmp_push_num_teams(loc, gtid, num_teams: 0, num_threads: 0); |
538 | } |
539 | KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); |
540 | KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); |
541 | KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); |
542 | |
543 | __kmp_fork_call( |
544 | loc, gtid, fork_context: fork_context_intel, argc, |
545 | VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task |
546 | VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); |
547 | __kmp_join_call(loc, gtid |
548 | #if OMPT_SUPPORT |
549 | , |
550 | fork_context: fork_context_intel |
551 | #endif |
552 | ); |
553 | |
554 | // Pop current CG root off list |
555 | KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); |
556 | kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; |
557 | this_thr->th.th_cg_roots = tmp->up; |
558 | KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" |
559 | " to node %p. cg_nthreads was %d\n" , |
560 | this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); |
561 | KMP_DEBUG_ASSERT(tmp->cg_nthreads); |
562 | int i = tmp->cg_nthreads--; |
563 | if (i == 1) { // check is we are the last thread in CG (not always the case) |
564 | __kmp_free(tmp); |
565 | } |
566 | // Restore current task's thread_limit from CG root |
567 | KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); |
568 | this_thr->th.th_current_task->td_icvs.thread_limit = |
569 | this_thr->th.th_cg_roots->cg_thread_limit; |
570 | |
571 | this_thr->th.th_teams_microtask = NULL; |
572 | this_thr->th.th_teams_level = 0; |
573 | *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; |
574 | va_end(ap); |
575 | #if KMP_STATS_ENABLED |
576 | if (previous_state == stats_state_e::SERIAL_REGION) { |
577 | KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); |
578 | KMP_SET_THREAD_STATE(previous_state); |
579 | } else { |
580 | KMP_POP_PARTITIONED_TIMER(); |
581 | } |
582 | #endif // KMP_STATS_ENABLED |
583 | } |
584 | |
585 | // I don't think this function should ever have been exported. |
586 | // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated |
587 | // openmp code ever called it, but it's been exported from the RTL for so |
588 | // long that I'm afraid to remove the definition. |
589 | int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } |
590 | |
591 | /*! |
592 | @ingroup PARALLEL |
593 | @param loc source location information |
594 | @param global_tid global thread number |
595 | |
596 | Enter a serialized parallel construct. This interface is used to handle a |
597 | conditional parallel region, like this, |
598 | @code |
599 | #pragma omp parallel if (condition) |
600 | @endcode |
601 | when the condition is false. |
602 | */ |
603 | void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { |
604 | // The implementation is now in kmp_runtime.cpp so that it can share static |
605 | // functions with kmp_fork_call since the tasks to be done are similar in |
606 | // each case. |
607 | __kmp_assert_valid_gtid(gtid: global_tid); |
608 | #if OMPT_SUPPORT |
609 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
610 | #endif |
611 | __kmp_serialized_parallel(id: loc, gtid: global_tid); |
612 | } |
613 | |
614 | /*! |
615 | @ingroup PARALLEL |
616 | @param loc source location information |
617 | @param global_tid global thread number |
618 | |
619 | Leave a serialized parallel construct. |
620 | */ |
621 | void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { |
622 | kmp_internal_control_t *top; |
623 | kmp_info_t *this_thr; |
624 | kmp_team_t *serial_team; |
625 | |
626 | KC_TRACE(10, |
627 | ("__kmpc_end_serialized_parallel: called by T#%d\n" , global_tid)); |
628 | |
629 | /* skip all this code for autopar serialized loops since it results in |
630 | unacceptable overhead */ |
631 | if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) |
632 | return; |
633 | |
634 | // Not autopar code |
635 | __kmp_assert_valid_gtid(gtid: global_tid); |
636 | if (!TCR_4(__kmp_init_parallel)) |
637 | __kmp_parallel_initialize(); |
638 | |
639 | __kmp_resume_if_soft_paused(); |
640 | |
641 | this_thr = __kmp_threads[global_tid]; |
642 | serial_team = this_thr->th.th_serial_team; |
643 | |
644 | kmp_task_team_t *task_team = this_thr->th.th_task_team; |
645 | // we need to wait for the proxy tasks before finishing the thread |
646 | if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || |
647 | task_team->tt.tt_hidden_helper_task_encountered)) |
648 | __kmp_task_team_wait(this_thr, team: serial_team USE_ITT_BUILD_ARG(NULL)); |
649 | |
650 | KMP_MB(); |
651 | KMP_DEBUG_ASSERT(serial_team); |
652 | KMP_ASSERT(serial_team->t.t_serialized); |
653 | KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); |
654 | KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); |
655 | KMP_DEBUG_ASSERT(serial_team->t.t_threads); |
656 | KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); |
657 | |
658 | #if OMPT_SUPPORT |
659 | if (ompt_enabled.enabled && |
660 | this_thr->th.ompt_thread_info.state != ompt_state_overhead) { |
661 | OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; |
662 | if (ompt_enabled.ompt_callback_implicit_task) { |
663 | ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( |
664 | ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, |
665 | OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); |
666 | } |
667 | |
668 | // reset clear the task id only after unlinking the task |
669 | ompt_data_t *parent_task_data; |
670 | __ompt_get_task_info_internal(ancestor_level: 1, NULL, task_data: &parent_task_data, NULL, NULL, NULL); |
671 | |
672 | if (ompt_enabled.ompt_callback_parallel_end) { |
673 | ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( |
674 | &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, |
675 | ompt_parallel_invoker_program | ompt_parallel_team, |
676 | OMPT_LOAD_RETURN_ADDRESS(global_tid)); |
677 | } |
678 | __ompt_lw_taskteam_unlink(thr: this_thr); |
679 | this_thr->th.ompt_thread_info.state = ompt_state_overhead; |
680 | } |
681 | #endif |
682 | |
683 | /* If necessary, pop the internal control stack values and replace the team |
684 | * values */ |
685 | top = serial_team->t.t_control_stack_top; |
686 | if (top && top->serial_nesting_level == serial_team->t.t_serialized) { |
687 | copy_icvs(dst: &serial_team->t.t_threads[0]->th.th_current_task->td_icvs, src: top); |
688 | serial_team->t.t_control_stack_top = top->next; |
689 | __kmp_free(top); |
690 | } |
691 | |
692 | /* pop dispatch buffers stack */ |
693 | KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); |
694 | { |
695 | dispatch_private_info_t *disp_buffer = |
696 | serial_team->t.t_dispatch->th_disp_buffer; |
697 | serial_team->t.t_dispatch->th_disp_buffer = |
698 | serial_team->t.t_dispatch->th_disp_buffer->next; |
699 | __kmp_free(disp_buffer); |
700 | } |
701 | |
702 | /* pop the task team stack */ |
703 | if (serial_team->t.t_serialized > 1) { |
704 | __kmp_pop_task_team_node(thread: this_thr, team: serial_team); |
705 | } |
706 | |
707 | this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore |
708 | |
709 | --serial_team->t.t_serialized; |
710 | if (serial_team->t.t_serialized == 0) { |
711 | |
712 | /* return to the parallel section */ |
713 | |
714 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
715 | if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { |
716 | __kmp_clear_x87_fpu_status_word(); |
717 | __kmp_load_x87_fpu_control_word(p: &serial_team->t.t_x87_fpu_control_word); |
718 | __kmp_load_mxcsr(p: &serial_team->t.t_mxcsr); |
719 | } |
720 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
721 | |
722 | __kmp_pop_current_task_from_thread(this_thr); |
723 | #if OMPD_SUPPORT |
724 | if (ompd_state & OMPD_ENABLE_BP) |
725 | ompd_bp_parallel_end(); |
726 | #endif |
727 | |
728 | this_thr->th.th_team = serial_team->t.t_parent; |
729 | this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; |
730 | |
731 | /* restore values cached in the thread */ |
732 | this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ |
733 | this_thr->th.th_team_master = |
734 | serial_team->t.t_parent->t.t_threads[0]; /* JPH */ |
735 | this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; |
736 | |
737 | /* TODO the below shouldn't need to be adjusted for serialized teams */ |
738 | this_thr->th.th_dispatch = |
739 | &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; |
740 | |
741 | KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); |
742 | this_thr->th.th_current_task->td_flags.executing = 1; |
743 | |
744 | if (__kmp_tasking_mode != tskm_immediate_exec) { |
745 | // Restore task state from serial team structure |
746 | KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 || |
747 | serial_team->t.t_primary_task_state == 1); |
748 | this_thr->th.th_task_state = |
749 | (kmp_uint8)serial_team->t.t_primary_task_state; |
750 | // Copy the task team from the new child / old parent team to the thread. |
751 | this_thr->th.th_task_team = |
752 | this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; |
753 | KA_TRACE(20, |
754 | ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " |
755 | "team %p\n" , |
756 | global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); |
757 | } |
758 | #if KMP_AFFINITY_SUPPORTED |
759 | if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) { |
760 | __kmp_reset_root_init_mask(gtid: global_tid); |
761 | } |
762 | #endif |
763 | } else { |
764 | if (__kmp_tasking_mode != tskm_immediate_exec) { |
765 | KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " |
766 | "depth of serial team %p to %d\n" , |
767 | global_tid, serial_team, serial_team->t.t_serialized)); |
768 | } |
769 | } |
770 | |
771 | serial_team->t.t_level--; |
772 | if (__kmp_env_consistency_check) |
773 | __kmp_pop_parallel(gtid: global_tid, NULL); |
774 | #if OMPT_SUPPORT |
775 | if (ompt_enabled.enabled) |
776 | this_thr->th.ompt_thread_info.state = |
777 | ((this_thr->th.th_team_serialized) ? ompt_state_work_serial |
778 | : ompt_state_work_parallel); |
779 | #endif |
780 | } |
781 | |
782 | /*! |
783 | @ingroup SYNCHRONIZATION |
784 | @param loc source location information. |
785 | |
786 | Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though |
787 | depending on the memory ordering convention obeyed by the compiler |
788 | even that may not be necessary). |
789 | */ |
790 | void __kmpc_flush(ident_t *loc) { |
791 | KC_TRACE(10, ("__kmpc_flush: called\n" )); |
792 | |
793 | /* need explicit __mf() here since use volatile instead in library */ |
794 | KMP_MFENCE(); /* Flush all pending memory write invalidates. */ |
795 | |
796 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
797 | if (ompt_enabled.ompt_callback_flush) { |
798 | ompt_callbacks.ompt_callback(ompt_callback_flush)( |
799 | __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); |
800 | } |
801 | #endif |
802 | } |
803 | |
804 | /* -------------------------------------------------------------------------- */ |
805 | /*! |
806 | @ingroup SYNCHRONIZATION |
807 | @param loc source location information |
808 | @param global_tid thread id. |
809 | |
810 | Execute a barrier. |
811 | */ |
812 | void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { |
813 | KMP_COUNT_BLOCK(OMP_BARRIER); |
814 | KC_TRACE(10, ("__kmpc_barrier: called T#%d\n" , global_tid)); |
815 | __kmp_assert_valid_gtid(gtid: global_tid); |
816 | |
817 | if (!TCR_4(__kmp_init_parallel)) |
818 | __kmp_parallel_initialize(); |
819 | |
820 | __kmp_resume_if_soft_paused(); |
821 | |
822 | if (__kmp_env_consistency_check) { |
823 | if (loc == 0) { |
824 | KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? |
825 | } |
826 | __kmp_check_barrier(gtid: global_tid, ct: ct_barrier, ident: loc); |
827 | } |
828 | |
829 | #if OMPT_SUPPORT |
830 | ompt_frame_t *ompt_frame; |
831 | if (ompt_enabled.enabled) { |
832 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
833 | if (ompt_frame->enter_frame.ptr == NULL) |
834 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
835 | } |
836 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
837 | #endif |
838 | __kmp_threads[global_tid]->th.th_ident = loc; |
839 | // TODO: explicit barrier_wait_id: |
840 | // this function is called when 'barrier' directive is present or |
841 | // implicit barrier at the end of a worksharing construct. |
842 | // 1) better to add a per-thread barrier counter to a thread data structure |
843 | // 2) set to 0 when a new team is created |
844 | // 4) no sync is required |
845 | |
846 | __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL); |
847 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
848 | if (ompt_enabled.enabled) { |
849 | ompt_frame->enter_frame = ompt_data_none; |
850 | } |
851 | #endif |
852 | } |
853 | |
854 | /* The BARRIER for a MASTER section is always explicit */ |
855 | /*! |
856 | @ingroup WORK_SHARING |
857 | @param loc source location information. |
858 | @param global_tid global thread number . |
859 | @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. |
860 | */ |
861 | kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { |
862 | int status = 0; |
863 | |
864 | KC_TRACE(10, ("__kmpc_master: called T#%d\n" , global_tid)); |
865 | __kmp_assert_valid_gtid(gtid: global_tid); |
866 | |
867 | if (!TCR_4(__kmp_init_parallel)) |
868 | __kmp_parallel_initialize(); |
869 | |
870 | __kmp_resume_if_soft_paused(); |
871 | |
872 | if (KMP_MASTER_GTID(global_tid)) { |
873 | KMP_COUNT_BLOCK(OMP_MASTER); |
874 | KMP_PUSH_PARTITIONED_TIMER(OMP_master); |
875 | status = 1; |
876 | } |
877 | |
878 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
879 | if (status) { |
880 | if (ompt_enabled.ompt_callback_masked) { |
881 | kmp_info_t *this_thr = __kmp_threads[global_tid]; |
882 | kmp_team_t *team = this_thr->th.th_team; |
883 | |
884 | int tid = __kmp_tid_from_gtid(gtid: global_tid); |
885 | ompt_callbacks.ompt_callback(ompt_callback_masked)( |
886 | ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), |
887 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
888 | OMPT_GET_RETURN_ADDRESS(0)); |
889 | } |
890 | } |
891 | #endif |
892 | |
893 | if (__kmp_env_consistency_check) { |
894 | #if KMP_USE_DYNAMIC_LOCK |
895 | if (status) |
896 | __kmp_push_sync(gtid: global_tid, ct: ct_master, ident: loc, NULL, 0); |
897 | else |
898 | __kmp_check_sync(gtid: global_tid, ct: ct_master, ident: loc, NULL, 0); |
899 | #else |
900 | if (status) |
901 | __kmp_push_sync(global_tid, ct_master, loc, NULL); |
902 | else |
903 | __kmp_check_sync(global_tid, ct_master, loc, NULL); |
904 | #endif |
905 | } |
906 | |
907 | return status; |
908 | } |
909 | |
910 | /*! |
911 | @ingroup WORK_SHARING |
912 | @param loc source location information. |
913 | @param global_tid global thread number . |
914 | |
915 | Mark the end of a <tt>master</tt> region. This should only be called by the |
916 | thread that executes the <tt>master</tt> region. |
917 | */ |
918 | void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { |
919 | KC_TRACE(10, ("__kmpc_end_master: called T#%d\n" , global_tid)); |
920 | __kmp_assert_valid_gtid(gtid: global_tid); |
921 | KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); |
922 | KMP_POP_PARTITIONED_TIMER(); |
923 | |
924 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
925 | kmp_info_t *this_thr = __kmp_threads[global_tid]; |
926 | kmp_team_t *team = this_thr->th.th_team; |
927 | if (ompt_enabled.ompt_callback_masked) { |
928 | int tid = __kmp_tid_from_gtid(gtid: global_tid); |
929 | ompt_callbacks.ompt_callback(ompt_callback_masked)( |
930 | ompt_scope_end, &(team->t.ompt_team_info.parallel_data), |
931 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
932 | OMPT_GET_RETURN_ADDRESS(0)); |
933 | } |
934 | #endif |
935 | |
936 | if (__kmp_env_consistency_check) { |
937 | if (KMP_MASTER_GTID(global_tid)) |
938 | __kmp_pop_sync(gtid: global_tid, ct: ct_master, ident: loc); |
939 | } |
940 | } |
941 | |
942 | /*! |
943 | @ingroup WORK_SHARING |
944 | @param loc source location information. |
945 | @param global_tid global thread number. |
946 | @param filter result of evaluating filter clause on thread global_tid, or zero |
947 | if no filter clause present |
948 | @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. |
949 | */ |
950 | kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { |
951 | int status = 0; |
952 | int tid; |
953 | KC_TRACE(10, ("__kmpc_masked: called T#%d\n" , global_tid)); |
954 | __kmp_assert_valid_gtid(gtid: global_tid); |
955 | |
956 | if (!TCR_4(__kmp_init_parallel)) |
957 | __kmp_parallel_initialize(); |
958 | |
959 | __kmp_resume_if_soft_paused(); |
960 | |
961 | tid = __kmp_tid_from_gtid(gtid: global_tid); |
962 | if (tid == filter) { |
963 | KMP_COUNT_BLOCK(OMP_MASKED); |
964 | KMP_PUSH_PARTITIONED_TIMER(OMP_masked); |
965 | status = 1; |
966 | } |
967 | |
968 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
969 | if (status) { |
970 | if (ompt_enabled.ompt_callback_masked) { |
971 | kmp_info_t *this_thr = __kmp_threads[global_tid]; |
972 | kmp_team_t *team = this_thr->th.th_team; |
973 | ompt_callbacks.ompt_callback(ompt_callback_masked)( |
974 | ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), |
975 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
976 | OMPT_GET_RETURN_ADDRESS(0)); |
977 | } |
978 | } |
979 | #endif |
980 | |
981 | if (__kmp_env_consistency_check) { |
982 | #if KMP_USE_DYNAMIC_LOCK |
983 | if (status) |
984 | __kmp_push_sync(gtid: global_tid, ct: ct_masked, ident: loc, NULL, 0); |
985 | else |
986 | __kmp_check_sync(gtid: global_tid, ct: ct_masked, ident: loc, NULL, 0); |
987 | #else |
988 | if (status) |
989 | __kmp_push_sync(global_tid, ct_masked, loc, NULL); |
990 | else |
991 | __kmp_check_sync(global_tid, ct_masked, loc, NULL); |
992 | #endif |
993 | } |
994 | |
995 | return status; |
996 | } |
997 | |
998 | /*! |
999 | @ingroup WORK_SHARING |
1000 | @param loc source location information. |
1001 | @param global_tid global thread number . |
1002 | |
1003 | Mark the end of a <tt>masked</tt> region. This should only be called by the |
1004 | thread that executes the <tt>masked</tt> region. |
1005 | */ |
1006 | void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { |
1007 | KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n" , global_tid)); |
1008 | __kmp_assert_valid_gtid(gtid: global_tid); |
1009 | KMP_POP_PARTITIONED_TIMER(); |
1010 | |
1011 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1012 | kmp_info_t *this_thr = __kmp_threads[global_tid]; |
1013 | kmp_team_t *team = this_thr->th.th_team; |
1014 | if (ompt_enabled.ompt_callback_masked) { |
1015 | int tid = __kmp_tid_from_gtid(gtid: global_tid); |
1016 | ompt_callbacks.ompt_callback(ompt_callback_masked)( |
1017 | ompt_scope_end, &(team->t.ompt_team_info.parallel_data), |
1018 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
1019 | OMPT_GET_RETURN_ADDRESS(0)); |
1020 | } |
1021 | #endif |
1022 | |
1023 | if (__kmp_env_consistency_check) { |
1024 | __kmp_pop_sync(gtid: global_tid, ct: ct_masked, ident: loc); |
1025 | } |
1026 | } |
1027 | |
1028 | /*! |
1029 | @ingroup WORK_SHARING |
1030 | @param loc source location information. |
1031 | @param gtid global thread number. |
1032 | |
1033 | Start execution of an <tt>ordered</tt> construct. |
1034 | */ |
1035 | void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { |
1036 | int cid = 0; |
1037 | kmp_info_t *th; |
1038 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
1039 | |
1040 | KC_TRACE(10, ("__kmpc_ordered: called T#%d\n" , gtid)); |
1041 | __kmp_assert_valid_gtid(gtid); |
1042 | |
1043 | if (!TCR_4(__kmp_init_parallel)) |
1044 | __kmp_parallel_initialize(); |
1045 | |
1046 | __kmp_resume_if_soft_paused(); |
1047 | |
1048 | #if USE_ITT_BUILD |
1049 | __kmp_itt_ordered_prep(gtid); |
1050 | // TODO: ordered_wait_id |
1051 | #endif /* USE_ITT_BUILD */ |
1052 | |
1053 | th = __kmp_threads[gtid]; |
1054 | |
1055 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1056 | kmp_team_t *team; |
1057 | ompt_wait_id_t lck; |
1058 | void *codeptr_ra; |
1059 | OMPT_STORE_RETURN_ADDRESS(gtid); |
1060 | if (ompt_enabled.enabled) { |
1061 | team = __kmp_team_from_gtid(gtid); |
1062 | lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; |
1063 | /* OMPT state update */ |
1064 | th->th.ompt_thread_info.wait_id = lck; |
1065 | th->th.ompt_thread_info.state = ompt_state_wait_ordered; |
1066 | |
1067 | /* OMPT event callback */ |
1068 | codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); |
1069 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
1070 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
1071 | ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, |
1072 | codeptr_ra); |
1073 | } |
1074 | } |
1075 | #endif |
1076 | |
1077 | if (th->th.th_dispatch->th_deo_fcn != 0) |
1078 | (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); |
1079 | else |
1080 | __kmp_parallel_deo(gtid_ref: >id, cid_ref: &cid, loc_ref: loc); |
1081 | |
1082 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1083 | if (ompt_enabled.enabled) { |
1084 | /* OMPT state update */ |
1085 | th->th.ompt_thread_info.state = ompt_state_work_parallel; |
1086 | th->th.ompt_thread_info.wait_id = 0; |
1087 | |
1088 | /* OMPT event callback */ |
1089 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
1090 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
1091 | ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); |
1092 | } |
1093 | } |
1094 | #endif |
1095 | |
1096 | #if USE_ITT_BUILD |
1097 | __kmp_itt_ordered_start(gtid); |
1098 | #endif /* USE_ITT_BUILD */ |
1099 | } |
1100 | |
1101 | /*! |
1102 | @ingroup WORK_SHARING |
1103 | @param loc source location information. |
1104 | @param gtid global thread number. |
1105 | |
1106 | End execution of an <tt>ordered</tt> construct. |
1107 | */ |
1108 | void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { |
1109 | int cid = 0; |
1110 | kmp_info_t *th; |
1111 | |
1112 | KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n" , gtid)); |
1113 | __kmp_assert_valid_gtid(gtid); |
1114 | |
1115 | #if USE_ITT_BUILD |
1116 | __kmp_itt_ordered_end(gtid); |
1117 | // TODO: ordered_wait_id |
1118 | #endif /* USE_ITT_BUILD */ |
1119 | |
1120 | th = __kmp_threads[gtid]; |
1121 | |
1122 | if (th->th.th_dispatch->th_dxo_fcn != 0) |
1123 | (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); |
1124 | else |
1125 | __kmp_parallel_dxo(gtid_ref: >id, cid_ref: &cid, loc_ref: loc); |
1126 | |
1127 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1128 | OMPT_STORE_RETURN_ADDRESS(gtid); |
1129 | if (ompt_enabled.ompt_callback_mutex_released) { |
1130 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
1131 | ompt_mutex_ordered, |
1132 | (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) |
1133 | ->t.t_ordered.dt.t_value, |
1134 | OMPT_LOAD_RETURN_ADDRESS(gtid)); |
1135 | } |
1136 | #endif |
1137 | } |
1138 | |
1139 | #if KMP_USE_DYNAMIC_LOCK |
1140 | |
1141 | static __forceinline void |
1142 | __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, |
1143 | kmp_int32 gtid, kmp_indirect_locktag_t tag) { |
1144 | // Pointer to the allocated indirect lock is written to crit, while indexing |
1145 | // is ignored. |
1146 | void *idx; |
1147 | kmp_indirect_lock_t **lck; |
1148 | lck = (kmp_indirect_lock_t **)crit; |
1149 | kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); |
1150 | KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); |
1151 | KMP_SET_I_LOCK_LOCATION(ilk, loc); |
1152 | KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); |
1153 | KA_TRACE(20, |
1154 | ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n" , tag)); |
1155 | #if USE_ITT_BUILD |
1156 | __kmp_itt_critical_creating(lock: ilk->lock, loc); |
1157 | #endif |
1158 | int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); |
1159 | if (status == 0) { |
1160 | #if USE_ITT_BUILD |
1161 | __kmp_itt_critical_destroyed(lock: ilk->lock); |
1162 | #endif |
1163 | // We don't really need to destroy the unclaimed lock here since it will be |
1164 | // cleaned up at program exit. |
1165 | // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); |
1166 | } |
1167 | KMP_DEBUG_ASSERT(*lck != NULL); |
1168 | } |
1169 | |
1170 | // Fast-path acquire tas lock |
1171 | #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ |
1172 | { \ |
1173 | kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ |
1174 | kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ |
1175 | kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ |
1176 | if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ |
1177 | !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ |
1178 | kmp_uint32 spins; \ |
1179 | KMP_FSYNC_PREPARE(l); \ |
1180 | KMP_INIT_YIELD(spins); \ |
1181 | kmp_backoff_t backoff = __kmp_spin_backoff_params; \ |
1182 | do { \ |
1183 | if (TCR_4(__kmp_nth) > \ |
1184 | (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ |
1185 | KMP_YIELD(TRUE); \ |
1186 | } else { \ |
1187 | KMP_YIELD_SPIN(spins); \ |
1188 | } \ |
1189 | __kmp_spin_backoff(&backoff); \ |
1190 | } while ( \ |
1191 | KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ |
1192 | !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ |
1193 | } \ |
1194 | KMP_FSYNC_ACQUIRED(l); \ |
1195 | } |
1196 | |
1197 | // Fast-path test tas lock |
1198 | #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ |
1199 | { \ |
1200 | kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ |
1201 | kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ |
1202 | kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ |
1203 | rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ |
1204 | __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ |
1205 | } |
1206 | |
1207 | // Fast-path release tas lock |
1208 | #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ |
1209 | { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } |
1210 | |
1211 | #if KMP_USE_FUTEX |
1212 | |
1213 | #include <sys/syscall.h> |
1214 | #include <unistd.h> |
1215 | #ifndef FUTEX_WAIT |
1216 | #define FUTEX_WAIT 0 |
1217 | #endif |
1218 | #ifndef FUTEX_WAKE |
1219 | #define FUTEX_WAKE 1 |
1220 | #endif |
1221 | |
1222 | // Fast-path acquire futex lock |
1223 | #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ |
1224 | { \ |
1225 | kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ |
1226 | kmp_int32 gtid_code = (gtid + 1) << 1; \ |
1227 | KMP_MB(); \ |
1228 | KMP_FSYNC_PREPARE(ftx); \ |
1229 | kmp_int32 poll_val; \ |
1230 | while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ |
1231 | &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ |
1232 | KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ |
1233 | kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ |
1234 | if (!cond) { \ |
1235 | if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ |
1236 | poll_val | \ |
1237 | KMP_LOCK_BUSY(1, futex))) { \ |
1238 | continue; \ |
1239 | } \ |
1240 | poll_val |= KMP_LOCK_BUSY(1, futex); \ |
1241 | } \ |
1242 | kmp_int32 rc; \ |
1243 | if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ |
1244 | NULL, NULL, 0)) != 0) { \ |
1245 | continue; \ |
1246 | } \ |
1247 | gtid_code |= 1; \ |
1248 | } \ |
1249 | KMP_FSYNC_ACQUIRED(ftx); \ |
1250 | } |
1251 | |
1252 | // Fast-path test futex lock |
1253 | #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ |
1254 | { \ |
1255 | kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ |
1256 | if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ |
1257 | KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ |
1258 | KMP_FSYNC_ACQUIRED(ftx); \ |
1259 | rc = TRUE; \ |
1260 | } else { \ |
1261 | rc = FALSE; \ |
1262 | } \ |
1263 | } |
1264 | |
1265 | // Fast-path release futex lock |
1266 | #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ |
1267 | { \ |
1268 | kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ |
1269 | KMP_MB(); \ |
1270 | KMP_FSYNC_RELEASING(ftx); \ |
1271 | kmp_int32 poll_val = \ |
1272 | KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ |
1273 | if (KMP_LOCK_STRIP(poll_val) & 1) { \ |
1274 | syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ |
1275 | KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ |
1276 | } \ |
1277 | KMP_MB(); \ |
1278 | KMP_YIELD_OVERSUB(); \ |
1279 | } |
1280 | |
1281 | #endif // KMP_USE_FUTEX |
1282 | |
1283 | #else // KMP_USE_DYNAMIC_LOCK |
1284 | |
1285 | static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, |
1286 | ident_t const *loc, |
1287 | kmp_int32 gtid) { |
1288 | kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; |
1289 | |
1290 | // Because of the double-check, the following load doesn't need to be volatile |
1291 | kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); |
1292 | |
1293 | if (lck == NULL) { |
1294 | void *idx; |
1295 | |
1296 | // Allocate & initialize the lock. |
1297 | // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() |
1298 | lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); |
1299 | __kmp_init_user_lock_with_checks(lck); |
1300 | __kmp_set_user_lock_location(lck, loc); |
1301 | #if USE_ITT_BUILD |
1302 | __kmp_itt_critical_creating(lck); |
1303 | // __kmp_itt_critical_creating() should be called *before* the first usage |
1304 | // of underlying lock. It is the only place where we can guarantee it. There |
1305 | // are chances the lock will destroyed with no usage, but it is not a |
1306 | // problem, because this is not real event seen by user but rather setting |
1307 | // name for object (lock). See more details in kmp_itt.h. |
1308 | #endif /* USE_ITT_BUILD */ |
1309 | |
1310 | // Use a cmpxchg instruction to slam the start of the critical section with |
1311 | // the lock pointer. If another thread beat us to it, deallocate the lock, |
1312 | // and use the lock that the other thread allocated. |
1313 | int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); |
1314 | |
1315 | if (status == 0) { |
1316 | // Deallocate the lock and reload the value. |
1317 | #if USE_ITT_BUILD |
1318 | __kmp_itt_critical_destroyed(lck); |
1319 | // Let ITT know the lock is destroyed and the same memory location may be reused |
1320 | // for another purpose. |
1321 | #endif /* USE_ITT_BUILD */ |
1322 | __kmp_destroy_user_lock_with_checks(lck); |
1323 | __kmp_user_lock_free(&idx, gtid, lck); |
1324 | lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); |
1325 | KMP_DEBUG_ASSERT(lck != NULL); |
1326 | } |
1327 | } |
1328 | return lck; |
1329 | } |
1330 | |
1331 | #endif // KMP_USE_DYNAMIC_LOCK |
1332 | |
1333 | /*! |
1334 | @ingroup WORK_SHARING |
1335 | @param loc source location information. |
1336 | @param global_tid global thread number. |
1337 | @param crit identity of the critical section. This could be a pointer to a lock |
1338 | associated with the critical section, or some other suitably unique value. |
1339 | |
1340 | Enter code protected by a `critical` construct. |
1341 | This function blocks until the executing thread can enter the critical section. |
1342 | */ |
1343 | void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, |
1344 | kmp_critical_name *crit) { |
1345 | #if KMP_USE_DYNAMIC_LOCK |
1346 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1347 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
1348 | #endif // OMPT_SUPPORT |
1349 | __kmpc_critical_with_hint(loc, global_tid, crit, hint: omp_lock_hint_none); |
1350 | #else |
1351 | KMP_COUNT_BLOCK(OMP_CRITICAL); |
1352 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1353 | ompt_state_t prev_state = ompt_state_undefined; |
1354 | ompt_thread_info_t ti; |
1355 | #endif |
1356 | kmp_user_lock_p lck; |
1357 | |
1358 | KC_TRACE(10, ("__kmpc_critical: called T#%d\n" , global_tid)); |
1359 | __kmp_assert_valid_gtid(global_tid); |
1360 | |
1361 | // TODO: add THR_OVHD_STATE |
1362 | |
1363 | KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); |
1364 | KMP_CHECK_USER_LOCK_INIT(); |
1365 | |
1366 | if ((__kmp_user_lock_kind == lk_tas) && |
1367 | (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { |
1368 | lck = (kmp_user_lock_p)crit; |
1369 | } |
1370 | #if KMP_USE_FUTEX |
1371 | else if ((__kmp_user_lock_kind == lk_futex) && |
1372 | (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { |
1373 | lck = (kmp_user_lock_p)crit; |
1374 | } |
1375 | #endif |
1376 | else { // ticket, queuing or drdpa |
1377 | lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); |
1378 | } |
1379 | |
1380 | if (__kmp_env_consistency_check) |
1381 | __kmp_push_sync(global_tid, ct_critical, loc, lck); |
1382 | |
1383 | // since the critical directive binds to all threads, not just the current |
1384 | // team we have to check this even if we are in a serialized team. |
1385 | // also, even if we are the uber thread, we still have to conduct the lock, |
1386 | // as we have to contend with sibling threads. |
1387 | |
1388 | #if USE_ITT_BUILD |
1389 | __kmp_itt_critical_acquiring(lck); |
1390 | #endif /* USE_ITT_BUILD */ |
1391 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1392 | OMPT_STORE_RETURN_ADDRESS(gtid); |
1393 | void *codeptr_ra = NULL; |
1394 | if (ompt_enabled.enabled) { |
1395 | ti = __kmp_threads[global_tid]->th.ompt_thread_info; |
1396 | /* OMPT state update */ |
1397 | prev_state = ti.state; |
1398 | ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; |
1399 | ti.state = ompt_state_wait_critical; |
1400 | |
1401 | /* OMPT event callback */ |
1402 | codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); |
1403 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
1404 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
1405 | ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), |
1406 | (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); |
1407 | } |
1408 | } |
1409 | #endif |
1410 | // Value of 'crit' should be good for using as a critical_id of the critical |
1411 | // section directive. |
1412 | __kmp_acquire_user_lock_with_checks(lck, global_tid); |
1413 | |
1414 | #if USE_ITT_BUILD |
1415 | __kmp_itt_critical_acquired(lck); |
1416 | #endif /* USE_ITT_BUILD */ |
1417 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1418 | if (ompt_enabled.enabled) { |
1419 | /* OMPT state update */ |
1420 | ti.state = prev_state; |
1421 | ti.wait_id = 0; |
1422 | |
1423 | /* OMPT event callback */ |
1424 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
1425 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
1426 | ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); |
1427 | } |
1428 | } |
1429 | #endif |
1430 | KMP_POP_PARTITIONED_TIMER(); |
1431 | |
1432 | KMP_PUSH_PARTITIONED_TIMER(OMP_critical); |
1433 | KA_TRACE(15, ("__kmpc_critical: done T#%d\n" , global_tid)); |
1434 | #endif // KMP_USE_DYNAMIC_LOCK |
1435 | } |
1436 | |
1437 | #if KMP_USE_DYNAMIC_LOCK |
1438 | |
1439 | // Converts the given hint to an internal lock implementation |
1440 | static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { |
1441 | #if KMP_USE_TSX |
1442 | #define KMP_TSX_LOCK(seq) lockseq_##seq |
1443 | #else |
1444 | #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq |
1445 | #endif |
1446 | |
1447 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1448 | #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm) |
1449 | #else |
1450 | #define KMP_CPUINFO_RTM 0 |
1451 | #endif |
1452 | |
1453 | // Hints that do not require further logic |
1454 | if (hint & kmp_lock_hint_hle) |
1455 | return KMP_TSX_LOCK(hle); |
1456 | if (hint & kmp_lock_hint_rtm) |
1457 | return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; |
1458 | if (hint & kmp_lock_hint_adaptive) |
1459 | return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; |
1460 | |
1461 | // Rule out conflicting hints first by returning the default lock |
1462 | if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) |
1463 | return __kmp_user_lock_seq; |
1464 | if ((hint & omp_lock_hint_speculative) && |
1465 | (hint & omp_lock_hint_nonspeculative)) |
1466 | return __kmp_user_lock_seq; |
1467 | |
1468 | // Do not even consider speculation when it appears to be contended |
1469 | if (hint & omp_lock_hint_contended) |
1470 | return lockseq_queuing; |
1471 | |
1472 | // Uncontended lock without speculation |
1473 | if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) |
1474 | return lockseq_tas; |
1475 | |
1476 | // Use RTM lock for speculation |
1477 | if (hint & omp_lock_hint_speculative) |
1478 | return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; |
1479 | |
1480 | return __kmp_user_lock_seq; |
1481 | } |
1482 | |
1483 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1484 | #if KMP_USE_DYNAMIC_LOCK |
1485 | static kmp_mutex_impl_t |
1486 | __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { |
1487 | if (user_lock) { |
1488 | switch (KMP_EXTRACT_D_TAG(user_lock)) { |
1489 | case 0: |
1490 | break; |
1491 | #if KMP_USE_FUTEX |
1492 | case locktag_futex: |
1493 | return kmp_mutex_impl_queuing; |
1494 | #endif |
1495 | case locktag_tas: |
1496 | return kmp_mutex_impl_spin; |
1497 | #if KMP_USE_TSX |
1498 | case locktag_hle: |
1499 | case locktag_rtm_spin: |
1500 | return kmp_mutex_impl_speculative; |
1501 | #endif |
1502 | default: |
1503 | return kmp_mutex_impl_none; |
1504 | } |
1505 | ilock = KMP_LOOKUP_I_LOCK(user_lock); |
1506 | } |
1507 | KMP_ASSERT(ilock); |
1508 | switch (ilock->type) { |
1509 | #if KMP_USE_TSX |
1510 | case locktag_adaptive: |
1511 | case locktag_rtm_queuing: |
1512 | return kmp_mutex_impl_speculative; |
1513 | #endif |
1514 | case locktag_nested_tas: |
1515 | return kmp_mutex_impl_spin; |
1516 | #if KMP_USE_FUTEX |
1517 | case locktag_nested_futex: |
1518 | #endif |
1519 | case locktag_ticket: |
1520 | case locktag_queuing: |
1521 | case locktag_drdpa: |
1522 | case locktag_nested_ticket: |
1523 | case locktag_nested_queuing: |
1524 | case locktag_nested_drdpa: |
1525 | return kmp_mutex_impl_queuing; |
1526 | default: |
1527 | return kmp_mutex_impl_none; |
1528 | } |
1529 | } |
1530 | #else |
1531 | // For locks without dynamic binding |
1532 | static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { |
1533 | switch (__kmp_user_lock_kind) { |
1534 | case lk_tas: |
1535 | return kmp_mutex_impl_spin; |
1536 | #if KMP_USE_FUTEX |
1537 | case lk_futex: |
1538 | #endif |
1539 | case lk_ticket: |
1540 | case lk_queuing: |
1541 | case lk_drdpa: |
1542 | return kmp_mutex_impl_queuing; |
1543 | #if KMP_USE_TSX |
1544 | case lk_hle: |
1545 | case lk_rtm_queuing: |
1546 | case lk_rtm_spin: |
1547 | case lk_adaptive: |
1548 | return kmp_mutex_impl_speculative; |
1549 | #endif |
1550 | default: |
1551 | return kmp_mutex_impl_none; |
1552 | } |
1553 | } |
1554 | #endif // KMP_USE_DYNAMIC_LOCK |
1555 | #endif // OMPT_SUPPORT && OMPT_OPTIONAL |
1556 | |
1557 | /*! |
1558 | @ingroup WORK_SHARING |
1559 | @param loc source location information. |
1560 | @param global_tid global thread number. |
1561 | @param crit identity of the critical section. This could be a pointer to a lock |
1562 | associated with the critical section, or some other suitably unique value. |
1563 | @param hint the lock hint. |
1564 | |
1565 | Enter code protected by a `critical` construct with a hint. The hint value is |
1566 | used to suggest a lock implementation. This function blocks until the executing |
1567 | thread can enter the critical section unless the hint suggests use of |
1568 | speculative execution and the hardware supports it. |
1569 | */ |
1570 | void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, |
1571 | kmp_critical_name *crit, uint32_t hint) { |
1572 | KMP_COUNT_BLOCK(OMP_CRITICAL); |
1573 | kmp_user_lock_p lck; |
1574 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1575 | ompt_state_t prev_state = ompt_state_undefined; |
1576 | ompt_thread_info_t ti; |
1577 | // This is the case, if called from __kmpc_critical: |
1578 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); |
1579 | if (!codeptr) |
1580 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
1581 | #endif |
1582 | |
1583 | KC_TRACE(10, ("__kmpc_critical: called T#%d\n" , global_tid)); |
1584 | __kmp_assert_valid_gtid(gtid: global_tid); |
1585 | |
1586 | kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; |
1587 | // Check if it is initialized. |
1588 | KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); |
1589 | kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); |
1590 | if (*lk == 0) { |
1591 | if (KMP_IS_D_LOCK(lockseq)) { |
1592 | (void)KMP_COMPARE_AND_STORE_ACQ32( |
1593 | (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0, |
1594 | KMP_GET_D_TAG(lockseq)); |
1595 | } else { |
1596 | __kmp_init_indirect_csptr(crit, loc, gtid: global_tid, KMP_GET_I_TAG(lockseq)); |
1597 | } |
1598 | } |
1599 | // Branch for accessing the actual lock object and set operation. This |
1600 | // branching is inevitable since this lock initialization does not follow the |
1601 | // normal dispatch path (lock table is not used). |
1602 | if (KMP_EXTRACT_D_TAG(lk) != 0) { |
1603 | lck = (kmp_user_lock_p)lk; |
1604 | if (__kmp_env_consistency_check) { |
1605 | __kmp_push_sync(gtid: global_tid, ct: ct_critical, ident: loc, name: lck, |
1606 | __kmp_map_hint_to_lock(hint)); |
1607 | } |
1608 | #if USE_ITT_BUILD |
1609 | __kmp_itt_critical_acquiring(lock: lck); |
1610 | #endif |
1611 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1612 | if (ompt_enabled.enabled) { |
1613 | ti = __kmp_threads[global_tid]->th.ompt_thread_info; |
1614 | /* OMPT state update */ |
1615 | prev_state = ti.state; |
1616 | ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; |
1617 | ti.state = ompt_state_wait_critical; |
1618 | |
1619 | /* OMPT event callback */ |
1620 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
1621 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
1622 | ompt_mutex_critical, (unsigned int)hint, |
1623 | __ompt_get_mutex_impl_type(user_lock: crit), (ompt_wait_id_t)(uintptr_t)lck, |
1624 | codeptr); |
1625 | } |
1626 | } |
1627 | #endif |
1628 | #if KMP_USE_INLINED_TAS |
1629 | if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { |
1630 | KMP_ACQUIRE_TAS_LOCK(lck, global_tid); |
1631 | } else |
1632 | #elif KMP_USE_INLINED_FUTEX |
1633 | if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { |
1634 | KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); |
1635 | } else |
1636 | #endif |
1637 | { |
1638 | KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); |
1639 | } |
1640 | } else { |
1641 | kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); |
1642 | lck = ilk->lock; |
1643 | if (__kmp_env_consistency_check) { |
1644 | __kmp_push_sync(gtid: global_tid, ct: ct_critical, ident: loc, name: lck, |
1645 | __kmp_map_hint_to_lock(hint)); |
1646 | } |
1647 | #if USE_ITT_BUILD |
1648 | __kmp_itt_critical_acquiring(lock: lck); |
1649 | #endif |
1650 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1651 | if (ompt_enabled.enabled) { |
1652 | ti = __kmp_threads[global_tid]->th.ompt_thread_info; |
1653 | /* OMPT state update */ |
1654 | prev_state = ti.state; |
1655 | ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; |
1656 | ti.state = ompt_state_wait_critical; |
1657 | |
1658 | /* OMPT event callback */ |
1659 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
1660 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
1661 | ompt_mutex_critical, (unsigned int)hint, |
1662 | __ompt_get_mutex_impl_type(user_lock: 0, ilock: ilk), (ompt_wait_id_t)(uintptr_t)lck, |
1663 | codeptr); |
1664 | } |
1665 | } |
1666 | #endif |
1667 | KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); |
1668 | } |
1669 | KMP_POP_PARTITIONED_TIMER(); |
1670 | |
1671 | #if USE_ITT_BUILD |
1672 | __kmp_itt_critical_acquired(lock: lck); |
1673 | #endif /* USE_ITT_BUILD */ |
1674 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1675 | if (ompt_enabled.enabled) { |
1676 | /* OMPT state update */ |
1677 | ti.state = prev_state; |
1678 | ti.wait_id = 0; |
1679 | |
1680 | /* OMPT event callback */ |
1681 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
1682 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
1683 | ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
1684 | } |
1685 | } |
1686 | #endif |
1687 | |
1688 | KMP_PUSH_PARTITIONED_TIMER(OMP_critical); |
1689 | KA_TRACE(15, ("__kmpc_critical: done T#%d\n" , global_tid)); |
1690 | } // __kmpc_critical_with_hint |
1691 | |
1692 | #endif // KMP_USE_DYNAMIC_LOCK |
1693 | |
1694 | /*! |
1695 | @ingroup WORK_SHARING |
1696 | @param loc source location information. |
1697 | @param global_tid global thread number . |
1698 | @param crit identity of the critical section. This could be a pointer to a lock |
1699 | associated with the critical section, or some other suitably unique value. |
1700 | |
1701 | Leave a critical section, releasing any lock that was held during its execution. |
1702 | */ |
1703 | void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, |
1704 | kmp_critical_name *crit) { |
1705 | kmp_user_lock_p lck; |
1706 | |
1707 | KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n" , global_tid)); |
1708 | |
1709 | #if KMP_USE_DYNAMIC_LOCK |
1710 | int locktag = KMP_EXTRACT_D_TAG(crit); |
1711 | if (locktag) { |
1712 | lck = (kmp_user_lock_p)crit; |
1713 | KMP_ASSERT(lck != NULL); |
1714 | if (__kmp_env_consistency_check) { |
1715 | __kmp_pop_sync(gtid: global_tid, ct: ct_critical, ident: loc); |
1716 | } |
1717 | #if USE_ITT_BUILD |
1718 | __kmp_itt_critical_releasing(lock: lck); |
1719 | #endif |
1720 | #if KMP_USE_INLINED_TAS |
1721 | if (locktag == locktag_tas && !__kmp_env_consistency_check) { |
1722 | KMP_RELEASE_TAS_LOCK(lck, global_tid); |
1723 | } else |
1724 | #elif KMP_USE_INLINED_FUTEX |
1725 | if (locktag == locktag_futex && !__kmp_env_consistency_check) { |
1726 | KMP_RELEASE_FUTEX_LOCK(lck, global_tid); |
1727 | } else |
1728 | #endif |
1729 | { |
1730 | KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); |
1731 | } |
1732 | } else { |
1733 | kmp_indirect_lock_t *ilk = |
1734 | (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); |
1735 | KMP_ASSERT(ilk != NULL); |
1736 | lck = ilk->lock; |
1737 | if (__kmp_env_consistency_check) { |
1738 | __kmp_pop_sync(gtid: global_tid, ct: ct_critical, ident: loc); |
1739 | } |
1740 | #if USE_ITT_BUILD |
1741 | __kmp_itt_critical_releasing(lock: lck); |
1742 | #endif |
1743 | KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); |
1744 | } |
1745 | |
1746 | #else // KMP_USE_DYNAMIC_LOCK |
1747 | |
1748 | if ((__kmp_user_lock_kind == lk_tas) && |
1749 | (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { |
1750 | lck = (kmp_user_lock_p)crit; |
1751 | } |
1752 | #if KMP_USE_FUTEX |
1753 | else if ((__kmp_user_lock_kind == lk_futex) && |
1754 | (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { |
1755 | lck = (kmp_user_lock_p)crit; |
1756 | } |
1757 | #endif |
1758 | else { // ticket, queuing or drdpa |
1759 | lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); |
1760 | } |
1761 | |
1762 | KMP_ASSERT(lck != NULL); |
1763 | |
1764 | if (__kmp_env_consistency_check) |
1765 | __kmp_pop_sync(global_tid, ct_critical, loc); |
1766 | |
1767 | #if USE_ITT_BUILD |
1768 | __kmp_itt_critical_releasing(lck); |
1769 | #endif /* USE_ITT_BUILD */ |
1770 | // Value of 'crit' should be good for using as a critical_id of the critical |
1771 | // section directive. |
1772 | __kmp_release_user_lock_with_checks(lck, global_tid); |
1773 | |
1774 | #endif // KMP_USE_DYNAMIC_LOCK |
1775 | |
1776 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1777 | /* OMPT release event triggers after lock is released; place here to trigger |
1778 | * for all #if branches */ |
1779 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
1780 | if (ompt_enabled.ompt_callback_mutex_released) { |
1781 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
1782 | ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, |
1783 | OMPT_LOAD_RETURN_ADDRESS(0)); |
1784 | } |
1785 | #endif |
1786 | |
1787 | KMP_POP_PARTITIONED_TIMER(); |
1788 | KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n" , global_tid)); |
1789 | } |
1790 | |
1791 | /*! |
1792 | @ingroup SYNCHRONIZATION |
1793 | @param loc source location information |
1794 | @param global_tid thread id. |
1795 | @return one if the thread should execute the master block, zero otherwise |
1796 | |
1797 | Start execution of a combined barrier and master. The barrier is executed inside |
1798 | this function. |
1799 | */ |
1800 | kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { |
1801 | int status; |
1802 | KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n" , global_tid)); |
1803 | __kmp_assert_valid_gtid(gtid: global_tid); |
1804 | |
1805 | if (!TCR_4(__kmp_init_parallel)) |
1806 | __kmp_parallel_initialize(); |
1807 | |
1808 | __kmp_resume_if_soft_paused(); |
1809 | |
1810 | if (__kmp_env_consistency_check) |
1811 | __kmp_check_barrier(gtid: global_tid, ct: ct_barrier, ident: loc); |
1812 | |
1813 | #if OMPT_SUPPORT |
1814 | ompt_frame_t *ompt_frame; |
1815 | if (ompt_enabled.enabled) { |
1816 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
1817 | if (ompt_frame->enter_frame.ptr == NULL) |
1818 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
1819 | } |
1820 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
1821 | #endif |
1822 | #if USE_ITT_NOTIFY |
1823 | __kmp_threads[global_tid]->th.th_ident = loc; |
1824 | #endif |
1825 | status = __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, TRUE, reduce_size: 0, NULL, NULL); |
1826 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1827 | if (ompt_enabled.enabled) { |
1828 | ompt_frame->enter_frame = ompt_data_none; |
1829 | } |
1830 | #endif |
1831 | |
1832 | return (status != 0) ? 0 : 1; |
1833 | } |
1834 | |
1835 | /*! |
1836 | @ingroup SYNCHRONIZATION |
1837 | @param loc source location information |
1838 | @param global_tid thread id. |
1839 | |
1840 | Complete the execution of a combined barrier and master. This function should |
1841 | only be called at the completion of the <tt>master</tt> code. Other threads will |
1842 | still be waiting at the barrier and this call releases them. |
1843 | */ |
1844 | void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { |
1845 | KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n" , global_tid)); |
1846 | __kmp_assert_valid_gtid(gtid: global_tid); |
1847 | __kmp_end_split_barrier(bt: bs_plain_barrier, gtid: global_tid); |
1848 | } |
1849 | |
1850 | /*! |
1851 | @ingroup SYNCHRONIZATION |
1852 | @param loc source location information |
1853 | @param global_tid thread id. |
1854 | @return one if the thread should execute the master block, zero otherwise |
1855 | |
1856 | Start execution of a combined barrier and master(nowait) construct. |
1857 | The barrier is executed inside this function. |
1858 | There is no equivalent "end" function, since the |
1859 | */ |
1860 | kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { |
1861 | kmp_int32 ret; |
1862 | KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n" , global_tid)); |
1863 | __kmp_assert_valid_gtid(gtid: global_tid); |
1864 | |
1865 | if (!TCR_4(__kmp_init_parallel)) |
1866 | __kmp_parallel_initialize(); |
1867 | |
1868 | __kmp_resume_if_soft_paused(); |
1869 | |
1870 | if (__kmp_env_consistency_check) { |
1871 | if (loc == 0) { |
1872 | KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? |
1873 | } |
1874 | __kmp_check_barrier(gtid: global_tid, ct: ct_barrier, ident: loc); |
1875 | } |
1876 | |
1877 | #if OMPT_SUPPORT |
1878 | ompt_frame_t *ompt_frame; |
1879 | if (ompt_enabled.enabled) { |
1880 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
1881 | if (ompt_frame->enter_frame.ptr == NULL) |
1882 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
1883 | } |
1884 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
1885 | #endif |
1886 | #if USE_ITT_NOTIFY |
1887 | __kmp_threads[global_tid]->th.th_ident = loc; |
1888 | #endif |
1889 | __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL); |
1890 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1891 | if (ompt_enabled.enabled) { |
1892 | ompt_frame->enter_frame = ompt_data_none; |
1893 | } |
1894 | #endif |
1895 | |
1896 | ret = __kmpc_master(loc, global_tid); |
1897 | |
1898 | if (__kmp_env_consistency_check) { |
1899 | /* there's no __kmpc_end_master called; so the (stats) */ |
1900 | /* actions of __kmpc_end_master are done here */ |
1901 | if (ret) { |
1902 | /* only one thread should do the pop since only */ |
1903 | /* one did the push (see __kmpc_master()) */ |
1904 | __kmp_pop_sync(gtid: global_tid, ct: ct_master, ident: loc); |
1905 | } |
1906 | } |
1907 | |
1908 | return (ret); |
1909 | } |
1910 | |
1911 | /* The BARRIER for a SINGLE process section is always explicit */ |
1912 | /*! |
1913 | @ingroup WORK_SHARING |
1914 | @param loc source location information |
1915 | @param global_tid global thread number |
1916 | @return One if this thread should execute the single construct, zero otherwise. |
1917 | |
1918 | Test whether to execute a <tt>single</tt> construct. |
1919 | There are no implicit barriers in the two "single" calls, rather the compiler |
1920 | should introduce an explicit barrier if it is required. |
1921 | */ |
1922 | |
1923 | kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { |
1924 | __kmp_assert_valid_gtid(gtid: global_tid); |
1925 | kmp_int32 rc = __kmp_enter_single(gtid: global_tid, id_ref: loc, TRUE); |
1926 | |
1927 | if (rc) { |
1928 | // We are going to execute the single statement, so we should count it. |
1929 | KMP_COUNT_BLOCK(OMP_SINGLE); |
1930 | KMP_PUSH_PARTITIONED_TIMER(OMP_single); |
1931 | } |
1932 | |
1933 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1934 | kmp_info_t *this_thr = __kmp_threads[global_tid]; |
1935 | kmp_team_t *team = this_thr->th.th_team; |
1936 | int tid = __kmp_tid_from_gtid(gtid: global_tid); |
1937 | |
1938 | if (ompt_enabled.enabled) { |
1939 | if (rc) { |
1940 | if (ompt_enabled.ompt_callback_work) { |
1941 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
1942 | ompt_work_single_executor, ompt_scope_begin, |
1943 | &(team->t.ompt_team_info.parallel_data), |
1944 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
1945 | 1, OMPT_GET_RETURN_ADDRESS(0)); |
1946 | } |
1947 | } else { |
1948 | if (ompt_enabled.ompt_callback_work) { |
1949 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
1950 | ompt_work_single_other, ompt_scope_begin, |
1951 | &(team->t.ompt_team_info.parallel_data), |
1952 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
1953 | 1, OMPT_GET_RETURN_ADDRESS(0)); |
1954 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
1955 | ompt_work_single_other, ompt_scope_end, |
1956 | &(team->t.ompt_team_info.parallel_data), |
1957 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), |
1958 | 1, OMPT_GET_RETURN_ADDRESS(0)); |
1959 | } |
1960 | } |
1961 | } |
1962 | #endif |
1963 | |
1964 | return rc; |
1965 | } |
1966 | |
1967 | /*! |
1968 | @ingroup WORK_SHARING |
1969 | @param loc source location information |
1970 | @param global_tid global thread number |
1971 | |
1972 | Mark the end of a <tt>single</tt> construct. This function should |
1973 | only be called by the thread that executed the block of code protected |
1974 | by the `single` construct. |
1975 | */ |
1976 | void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { |
1977 | __kmp_assert_valid_gtid(gtid: global_tid); |
1978 | __kmp_exit_single(gtid: global_tid); |
1979 | KMP_POP_PARTITIONED_TIMER(); |
1980 | |
1981 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
1982 | kmp_info_t *this_thr = __kmp_threads[global_tid]; |
1983 | kmp_team_t *team = this_thr->th.th_team; |
1984 | int tid = __kmp_tid_from_gtid(gtid: global_tid); |
1985 | |
1986 | if (ompt_enabled.ompt_callback_work) { |
1987 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
1988 | ompt_work_single_executor, ompt_scope_end, |
1989 | &(team->t.ompt_team_info.parallel_data), |
1990 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, |
1991 | OMPT_GET_RETURN_ADDRESS(0)); |
1992 | } |
1993 | #endif |
1994 | } |
1995 | |
1996 | /*! |
1997 | @ingroup WORK_SHARING |
1998 | @param loc Source location |
1999 | @param global_tid Global thread id |
2000 | |
2001 | Mark the end of a statically scheduled loop. |
2002 | */ |
2003 | void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { |
2004 | KMP_POP_PARTITIONED_TIMER(); |
2005 | KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n" , global_tid)); |
2006 | |
2007 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2008 | if (ompt_enabled.ompt_callback_work) { |
2009 | ompt_work_t ompt_work_type = ompt_work_loop_static; |
2010 | ompt_team_info_t *team_info = __ompt_get_teaminfo(depth: 0, NULL); |
2011 | ompt_task_info_t *task_info = __ompt_get_task_info_object(depth: 0); |
2012 | // Determine workshare type |
2013 | if (loc != NULL) { |
2014 | if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { |
2015 | ompt_work_type = ompt_work_loop_static; |
2016 | } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { |
2017 | ompt_work_type = ompt_work_sections; |
2018 | } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { |
2019 | ompt_work_type = ompt_work_distribute; |
2020 | } else { |
2021 | // use default set above. |
2022 | // a warning about this case is provided in __kmpc_for_static_init |
2023 | } |
2024 | KMP_DEBUG_ASSERT(ompt_work_type); |
2025 | } |
2026 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
2027 | ompt_work_type, ompt_scope_end, &(team_info->parallel_data), |
2028 | &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); |
2029 | } |
2030 | #endif |
2031 | if (__kmp_env_consistency_check) |
2032 | __kmp_pop_workshare(gtid: global_tid, ct: ct_pdo, ident: loc); |
2033 | } |
2034 | |
2035 | // User routines which take C-style arguments (call by value) |
2036 | // different from the Fortran equivalent routines |
2037 | |
2038 | void ompc_set_num_threads(int arg) { |
2039 | // !!!!! TODO: check the per-task binding |
2040 | __kmp_set_num_threads(new_nth: arg, __kmp_entry_gtid()); |
2041 | } |
2042 | |
2043 | void ompc_set_dynamic(int flag) { |
2044 | kmp_info_t *thread; |
2045 | |
2046 | /* For the thread-private implementation of the internal controls */ |
2047 | thread = __kmp_entry_thread(); |
2048 | |
2049 | __kmp_save_internal_controls(thread); |
2050 | |
2051 | set__dynamic(thread, flag ? true : false); |
2052 | } |
2053 | |
2054 | void ompc_set_nested(int flag) { |
2055 | kmp_info_t *thread; |
2056 | |
2057 | /* For the thread-private internal controls implementation */ |
2058 | thread = __kmp_entry_thread(); |
2059 | |
2060 | __kmp_save_internal_controls(thread); |
2061 | |
2062 | set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); |
2063 | } |
2064 | |
2065 | void ompc_set_max_active_levels(int max_active_levels) { |
2066 | /* TO DO */ |
2067 | /* we want per-task implementation of this internal control */ |
2068 | |
2069 | /* For the per-thread internal controls implementation */ |
2070 | __kmp_set_max_active_levels(__kmp_entry_gtid(), new_max_active_levels: max_active_levels); |
2071 | } |
2072 | |
2073 | void ompc_set_schedule(omp_sched_t kind, int modifier) { |
2074 | // !!!!! TODO: check the per-task binding |
2075 | __kmp_set_schedule(__kmp_entry_gtid(), new_sched: (kmp_sched_t)kind, chunk: modifier); |
2076 | } |
2077 | |
2078 | int ompc_get_ancestor_thread_num(int level) { |
2079 | return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); |
2080 | } |
2081 | |
2082 | int ompc_get_team_size(int level) { |
2083 | return __kmp_get_team_size(__kmp_entry_gtid(), level); |
2084 | } |
2085 | |
2086 | /* OpenMP 5.0 Affinity Format API */ |
2087 | void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { |
2088 | if (!__kmp_init_serial) { |
2089 | __kmp_serial_initialize(); |
2090 | } |
2091 | __kmp_strncpy_truncate(buffer: __kmp_affinity_format, buf_size: KMP_AFFINITY_FORMAT_SIZE, |
2092 | src: format, KMP_STRLEN(s: format) + 1); |
2093 | } |
2094 | |
2095 | size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { |
2096 | size_t format_size; |
2097 | if (!__kmp_init_serial) { |
2098 | __kmp_serial_initialize(); |
2099 | } |
2100 | format_size = KMP_STRLEN(s: __kmp_affinity_format); |
2101 | if (buffer && size) { |
2102 | __kmp_strncpy_truncate(buffer, buf_size: size, src: __kmp_affinity_format, |
2103 | src_size: format_size + 1); |
2104 | } |
2105 | return format_size; |
2106 | } |
2107 | |
2108 | void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { |
2109 | int gtid; |
2110 | if (!TCR_4(__kmp_init_middle)) { |
2111 | __kmp_middle_initialize(); |
2112 | } |
2113 | __kmp_assign_root_init_mask(); |
2114 | gtid = __kmp_get_gtid(); |
2115 | #if KMP_AFFINITY_SUPPORTED |
2116 | if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && |
2117 | __kmp_affinity.flags.reset) { |
2118 | __kmp_reset_root_init_mask(gtid); |
2119 | } |
2120 | #endif |
2121 | __kmp_aux_display_affinity(gtid, format); |
2122 | } |
2123 | |
2124 | size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, |
2125 | char const *format) { |
2126 | int gtid; |
2127 | size_t num_required; |
2128 | kmp_str_buf_t capture_buf; |
2129 | if (!TCR_4(__kmp_init_middle)) { |
2130 | __kmp_middle_initialize(); |
2131 | } |
2132 | __kmp_assign_root_init_mask(); |
2133 | gtid = __kmp_get_gtid(); |
2134 | #if KMP_AFFINITY_SUPPORTED |
2135 | if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && |
2136 | __kmp_affinity.flags.reset) { |
2137 | __kmp_reset_root_init_mask(gtid); |
2138 | } |
2139 | #endif |
2140 | __kmp_str_buf_init(&capture_buf); |
2141 | num_required = __kmp_aux_capture_affinity(gtid, format, buffer: &capture_buf); |
2142 | if (buffer && buf_size) { |
2143 | __kmp_strncpy_truncate(buffer, buf_size, src: capture_buf.str, |
2144 | src_size: capture_buf.used + 1); |
2145 | } |
2146 | __kmp_str_buf_free(buffer: &capture_buf); |
2147 | return num_required; |
2148 | } |
2149 | |
2150 | void kmpc_set_stacksize(int arg) { |
2151 | // __kmp_aux_set_stacksize initializes the library if needed |
2152 | __kmp_aux_set_stacksize(arg); |
2153 | } |
2154 | |
2155 | void kmpc_set_stacksize_s(size_t arg) { |
2156 | // __kmp_aux_set_stacksize initializes the library if needed |
2157 | __kmp_aux_set_stacksize(arg); |
2158 | } |
2159 | |
2160 | void kmpc_set_blocktime(int arg) { |
2161 | int gtid, tid, bt = arg; |
2162 | kmp_info_t *thread; |
2163 | |
2164 | gtid = __kmp_entry_gtid(); |
2165 | tid = __kmp_tid_from_gtid(gtid); |
2166 | thread = __kmp_thread_from_gtid(gtid); |
2167 | |
2168 | __kmp_aux_convert_blocktime(bt: &bt); |
2169 | __kmp_aux_set_blocktime(arg: bt, thread, tid); |
2170 | } |
2171 | |
2172 | void kmpc_set_library(int arg) { |
2173 | // __kmp_user_set_library initializes the library if needed |
2174 | __kmp_user_set_library(arg: (enum library_type)arg); |
2175 | } |
2176 | |
2177 | void kmpc_set_defaults(char const *str) { |
2178 | // __kmp_aux_set_defaults initializes the library if needed |
2179 | __kmp_aux_set_defaults(str, KMP_STRLEN(s: str)); |
2180 | } |
2181 | |
2182 | void kmpc_set_disp_num_buffers(int arg) { |
2183 | // ignore after initialization because some teams have already |
2184 | // allocated dispatch buffers |
2185 | if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && |
2186 | arg <= KMP_MAX_DISP_NUM_BUFF) { |
2187 | __kmp_dispatch_num_buffers = arg; |
2188 | } |
2189 | } |
2190 | |
2191 | int kmpc_set_affinity_mask_proc(int proc, void **mask) { |
2192 | #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED |
2193 | return -1; |
2194 | #else |
2195 | if (!TCR_4(__kmp_init_middle)) { |
2196 | __kmp_middle_initialize(); |
2197 | } |
2198 | __kmp_assign_root_init_mask(); |
2199 | return __kmp_aux_set_affinity_mask_proc(proc, mask); |
2200 | #endif |
2201 | } |
2202 | |
2203 | int kmpc_unset_affinity_mask_proc(int proc, void **mask) { |
2204 | #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED |
2205 | return -1; |
2206 | #else |
2207 | if (!TCR_4(__kmp_init_middle)) { |
2208 | __kmp_middle_initialize(); |
2209 | } |
2210 | __kmp_assign_root_init_mask(); |
2211 | return __kmp_aux_unset_affinity_mask_proc(proc, mask); |
2212 | #endif |
2213 | } |
2214 | |
2215 | int kmpc_get_affinity_mask_proc(int proc, void **mask) { |
2216 | #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED |
2217 | return -1; |
2218 | #else |
2219 | if (!TCR_4(__kmp_init_middle)) { |
2220 | __kmp_middle_initialize(); |
2221 | } |
2222 | __kmp_assign_root_init_mask(); |
2223 | return __kmp_aux_get_affinity_mask_proc(proc, mask); |
2224 | #endif |
2225 | } |
2226 | |
2227 | /* -------------------------------------------------------------------------- */ |
2228 | /*! |
2229 | @ingroup THREADPRIVATE |
2230 | @param loc source location information |
2231 | @param gtid global thread number |
2232 | @param cpy_size size of the cpy_data buffer |
2233 | @param cpy_data pointer to data to be copied |
2234 | @param cpy_func helper function to call for copying data |
2235 | @param didit flag variable: 1=single thread; 0=not single thread |
2236 | |
2237 | __kmpc_copyprivate implements the interface for the private data broadcast |
2238 | needed for the copyprivate clause associated with a single region in an |
2239 | OpenMP<sup>*</sup> program (both C and Fortran). |
2240 | All threads participating in the parallel region call this routine. |
2241 | One of the threads (called the single thread) should have the <tt>didit</tt> |
2242 | variable set to 1 and all other threads should have that variable set to 0. |
2243 | All threads pass a pointer to a data buffer (cpy_data) that they have built. |
2244 | |
2245 | The OpenMP specification forbids the use of nowait on the single region when a |
2246 | copyprivate clause is present. However, @ref __kmpc_copyprivate implements a |
2247 | barrier internally to avoid race conditions, so the code generation for the |
2248 | single region should avoid generating a barrier after the call to @ref |
2249 | __kmpc_copyprivate. |
2250 | |
2251 | The <tt>gtid</tt> parameter is the global thread id for the current thread. |
2252 | The <tt>loc</tt> parameter is a pointer to source location information. |
2253 | |
2254 | Internal implementation: The single thread will first copy its descriptor |
2255 | address (cpy_data) to a team-private location, then the other threads will each |
2256 | call the function pointed to by the parameter cpy_func, which carries out the |
2257 | copy by copying the data using the cpy_data buffer. |
2258 | |
2259 | The cpy_func routine used for the copy and the contents of the data area defined |
2260 | by cpy_data and cpy_size may be built in any fashion that will allow the copy |
2261 | to be done. For instance, the cpy_data buffer can hold the actual data to be |
2262 | copied or it may hold a list of pointers to the data. The cpy_func routine must |
2263 | interpret the cpy_data buffer appropriately. |
2264 | |
2265 | The interface to cpy_func is as follows: |
2266 | @code |
2267 | void cpy_func( void *destination, void *source ) |
2268 | @endcode |
2269 | where void *destination is the cpy_data pointer for the thread being copied to |
2270 | and void *source is the cpy_data pointer for the thread being copied from. |
2271 | */ |
2272 | void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, |
2273 | void *cpy_data, void (*cpy_func)(void *, void *), |
2274 | kmp_int32 didit) { |
2275 | void **data_ptr; |
2276 | KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n" , gtid)); |
2277 | __kmp_assert_valid_gtid(gtid); |
2278 | |
2279 | KMP_MB(); |
2280 | |
2281 | data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; |
2282 | |
2283 | if (__kmp_env_consistency_check) { |
2284 | if (loc == 0) { |
2285 | KMP_WARNING(ConstructIdentInvalid); |
2286 | } |
2287 | } |
2288 | |
2289 | // ToDo: Optimize the following two barriers into some kind of split barrier |
2290 | |
2291 | if (didit) |
2292 | *data_ptr = cpy_data; |
2293 | |
2294 | #if OMPT_SUPPORT |
2295 | ompt_frame_t *ompt_frame; |
2296 | if (ompt_enabled.enabled) { |
2297 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
2298 | if (ompt_frame->enter_frame.ptr == NULL) |
2299 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
2300 | } |
2301 | OMPT_STORE_RETURN_ADDRESS(gtid); |
2302 | #endif |
2303 | /* This barrier is not a barrier region boundary */ |
2304 | #if USE_ITT_NOTIFY |
2305 | __kmp_threads[gtid]->th.th_ident = loc; |
2306 | #endif |
2307 | __kmp_barrier(bt: bs_plain_barrier, gtid, FALSE, reduce_size: 0, NULL, NULL); |
2308 | |
2309 | if (!didit) |
2310 | (*cpy_func)(cpy_data, *data_ptr); |
2311 | |
2312 | // Consider next barrier a user-visible barrier for barrier region boundaries |
2313 | // Nesting checks are already handled by the single construct checks |
2314 | { |
2315 | #if OMPT_SUPPORT |
2316 | OMPT_STORE_RETURN_ADDRESS(gtid); |
2317 | #endif |
2318 | #if USE_ITT_NOTIFY |
2319 | __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. |
2320 | // tasks can overwrite the location) |
2321 | #endif |
2322 | __kmp_barrier(bt: bs_plain_barrier, gtid, FALSE, reduce_size: 0, NULL, NULL); |
2323 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2324 | if (ompt_enabled.enabled) { |
2325 | ompt_frame->enter_frame = ompt_data_none; |
2326 | } |
2327 | #endif |
2328 | } |
2329 | } |
2330 | |
2331 | /* --------------------------------------------------------------------------*/ |
2332 | /*! |
2333 | @ingroup THREADPRIVATE |
2334 | @param loc source location information |
2335 | @param gtid global thread number |
2336 | @param cpy_data pointer to the data to be saved/copied or 0 |
2337 | @return the saved pointer to the data |
2338 | |
2339 | __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate: |
2340 | __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so |
2341 | coming from single), and returns that pointer in all calls (for single thread |
2342 | it's not needed). This version doesn't do any actual data copying. Data copying |
2343 | has to be done somewhere else, e.g. inline in the generated code. Due to this, |
2344 | this function doesn't have any barrier at the end of the function, like |
2345 | __kmpc_copyprivate does, so generated code needs barrier after copying of all |
2346 | data was done. |
2347 | */ |
2348 | void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) { |
2349 | void **data_ptr; |
2350 | |
2351 | KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n" , gtid)); |
2352 | |
2353 | KMP_MB(); |
2354 | |
2355 | data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; |
2356 | |
2357 | if (__kmp_env_consistency_check) { |
2358 | if (loc == 0) { |
2359 | KMP_WARNING(ConstructIdentInvalid); |
2360 | } |
2361 | } |
2362 | |
2363 | // ToDo: Optimize the following barrier |
2364 | |
2365 | if (cpy_data) |
2366 | *data_ptr = cpy_data; |
2367 | |
2368 | #if OMPT_SUPPORT |
2369 | ompt_frame_t *ompt_frame; |
2370 | if (ompt_enabled.enabled) { |
2371 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
2372 | if (ompt_frame->enter_frame.ptr == NULL) |
2373 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
2374 | OMPT_STORE_RETURN_ADDRESS(gtid); |
2375 | } |
2376 | #endif |
2377 | /* This barrier is not a barrier region boundary */ |
2378 | #if USE_ITT_NOTIFY |
2379 | __kmp_threads[gtid]->th.th_ident = loc; |
2380 | #endif |
2381 | __kmp_barrier(bt: bs_plain_barrier, gtid, FALSE, reduce_size: 0, NULL, NULL); |
2382 | |
2383 | return *data_ptr; |
2384 | } |
2385 | |
2386 | /* -------------------------------------------------------------------------- */ |
2387 | |
2388 | #define INIT_LOCK __kmp_init_user_lock_with_checks |
2389 | #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks |
2390 | #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks |
2391 | #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed |
2392 | #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks |
2393 | #define ACQUIRE_NESTED_LOCK_TIMED \ |
2394 | __kmp_acquire_nested_user_lock_with_checks_timed |
2395 | #define RELEASE_LOCK __kmp_release_user_lock_with_checks |
2396 | #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks |
2397 | #define TEST_LOCK __kmp_test_user_lock_with_checks |
2398 | #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks |
2399 | #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks |
2400 | #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks |
2401 | |
2402 | // TODO: Make check abort messages use location info & pass it into |
2403 | // with_checks routines |
2404 | |
2405 | #if KMP_USE_DYNAMIC_LOCK |
2406 | |
2407 | // internal lock initializer |
2408 | static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, |
2409 | kmp_dyna_lockseq_t seq) { |
2410 | if (KMP_IS_D_LOCK(seq)) { |
2411 | KMP_INIT_D_LOCK(lock, seq); |
2412 | #if USE_ITT_BUILD |
2413 | __kmp_itt_lock_creating(lock: (kmp_user_lock_p)lock, NULL); |
2414 | #endif |
2415 | } else { |
2416 | KMP_INIT_I_LOCK(lock, seq); |
2417 | #if USE_ITT_BUILD |
2418 | kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); |
2419 | __kmp_itt_lock_creating(lock: ilk->lock, loc); |
2420 | #endif |
2421 | } |
2422 | } |
2423 | |
2424 | // internal nest lock initializer |
2425 | static __forceinline void |
2426 | __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, |
2427 | kmp_dyna_lockseq_t seq) { |
2428 | #if KMP_USE_TSX |
2429 | // Don't have nested lock implementation for speculative locks |
2430 | if (seq == lockseq_hle || seq == lockseq_rtm_queuing || |
2431 | seq == lockseq_rtm_spin || seq == lockseq_adaptive) |
2432 | seq = __kmp_user_lock_seq; |
2433 | #endif |
2434 | switch (seq) { |
2435 | case lockseq_tas: |
2436 | seq = lockseq_nested_tas; |
2437 | break; |
2438 | #if KMP_USE_FUTEX |
2439 | case lockseq_futex: |
2440 | seq = lockseq_nested_futex; |
2441 | break; |
2442 | #endif |
2443 | case lockseq_ticket: |
2444 | seq = lockseq_nested_ticket; |
2445 | break; |
2446 | case lockseq_queuing: |
2447 | seq = lockseq_nested_queuing; |
2448 | break; |
2449 | case lockseq_drdpa: |
2450 | seq = lockseq_nested_drdpa; |
2451 | break; |
2452 | default: |
2453 | seq = lockseq_nested_queuing; |
2454 | } |
2455 | KMP_INIT_I_LOCK(lock, seq); |
2456 | #if USE_ITT_BUILD |
2457 | kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); |
2458 | __kmp_itt_lock_creating(lock: ilk->lock, loc); |
2459 | #endif |
2460 | } |
2461 | |
2462 | /* initialize the lock with a hint */ |
2463 | void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, |
2464 | uintptr_t hint) { |
2465 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
2466 | if (__kmp_env_consistency_check && user_lock == NULL) { |
2467 | KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint" ); |
2468 | } |
2469 | |
2470 | __kmp_init_lock_with_hint(loc, lock: user_lock, seq: __kmp_map_hint_to_lock(hint)); |
2471 | |
2472 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2473 | // This is the case, if called from omp_init_lock_with_hint: |
2474 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2475 | if (!codeptr) |
2476 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2477 | if (ompt_enabled.ompt_callback_lock_init) { |
2478 | ompt_callbacks.ompt_callback(ompt_callback_lock_init)( |
2479 | ompt_mutex_lock, (omp_lock_hint_t)hint, |
2480 | __ompt_get_mutex_impl_type(user_lock), |
2481 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2482 | } |
2483 | #endif |
2484 | } |
2485 | |
2486 | /* initialize the lock with a hint */ |
2487 | void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, |
2488 | void **user_lock, uintptr_t hint) { |
2489 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
2490 | if (__kmp_env_consistency_check && user_lock == NULL) { |
2491 | KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint" ); |
2492 | } |
2493 | |
2494 | __kmp_init_nest_lock_with_hint(loc, lock: user_lock, seq: __kmp_map_hint_to_lock(hint)); |
2495 | |
2496 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2497 | // This is the case, if called from omp_init_lock_with_hint: |
2498 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2499 | if (!codeptr) |
2500 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2501 | if (ompt_enabled.ompt_callback_lock_init) { |
2502 | ompt_callbacks.ompt_callback(ompt_callback_lock_init)( |
2503 | ompt_mutex_nest_lock, (omp_lock_hint_t)hint, |
2504 | __ompt_get_mutex_impl_type(user_lock), |
2505 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2506 | } |
2507 | #endif |
2508 | } |
2509 | |
2510 | #endif // KMP_USE_DYNAMIC_LOCK |
2511 | |
2512 | /* initialize the lock */ |
2513 | void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
2514 | #if KMP_USE_DYNAMIC_LOCK |
2515 | |
2516 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
2517 | if (__kmp_env_consistency_check && user_lock == NULL) { |
2518 | KMP_FATAL(LockIsUninitialized, "omp_init_lock" ); |
2519 | } |
2520 | __kmp_init_lock_with_hint(loc, lock: user_lock, seq: __kmp_user_lock_seq); |
2521 | |
2522 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2523 | // This is the case, if called from omp_init_lock_with_hint: |
2524 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2525 | if (!codeptr) |
2526 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2527 | if (ompt_enabled.ompt_callback_lock_init) { |
2528 | ompt_callbacks.ompt_callback(ompt_callback_lock_init)( |
2529 | ompt_mutex_lock, omp_lock_hint_none, |
2530 | __ompt_get_mutex_impl_type(user_lock), |
2531 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2532 | } |
2533 | #endif |
2534 | |
2535 | #else // KMP_USE_DYNAMIC_LOCK |
2536 | |
2537 | static char const *const func = "omp_init_lock" ; |
2538 | kmp_user_lock_p lck; |
2539 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
2540 | |
2541 | if (__kmp_env_consistency_check) { |
2542 | if (user_lock == NULL) { |
2543 | KMP_FATAL(LockIsUninitialized, func); |
2544 | } |
2545 | } |
2546 | |
2547 | KMP_CHECK_USER_LOCK_INIT(); |
2548 | |
2549 | if ((__kmp_user_lock_kind == lk_tas) && |
2550 | (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2551 | lck = (kmp_user_lock_p)user_lock; |
2552 | } |
2553 | #if KMP_USE_FUTEX |
2554 | else if ((__kmp_user_lock_kind == lk_futex) && |
2555 | (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2556 | lck = (kmp_user_lock_p)user_lock; |
2557 | } |
2558 | #endif |
2559 | else { |
2560 | lck = __kmp_user_lock_allocate(user_lock, gtid, 0); |
2561 | } |
2562 | INIT_LOCK(lck); |
2563 | __kmp_set_user_lock_location(lck, loc); |
2564 | |
2565 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2566 | // This is the case, if called from omp_init_lock_with_hint: |
2567 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2568 | if (!codeptr) |
2569 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2570 | if (ompt_enabled.ompt_callback_lock_init) { |
2571 | ompt_callbacks.ompt_callback(ompt_callback_lock_init)( |
2572 | ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), |
2573 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2574 | } |
2575 | #endif |
2576 | |
2577 | #if USE_ITT_BUILD |
2578 | __kmp_itt_lock_creating(lck); |
2579 | #endif /* USE_ITT_BUILD */ |
2580 | |
2581 | #endif // KMP_USE_DYNAMIC_LOCK |
2582 | } // __kmpc_init_lock |
2583 | |
2584 | /* initialize the lock */ |
2585 | void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
2586 | #if KMP_USE_DYNAMIC_LOCK |
2587 | |
2588 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
2589 | if (__kmp_env_consistency_check && user_lock == NULL) { |
2590 | KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock" ); |
2591 | } |
2592 | __kmp_init_nest_lock_with_hint(loc, lock: user_lock, seq: __kmp_user_lock_seq); |
2593 | |
2594 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2595 | // This is the case, if called from omp_init_lock_with_hint: |
2596 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2597 | if (!codeptr) |
2598 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2599 | if (ompt_enabled.ompt_callback_lock_init) { |
2600 | ompt_callbacks.ompt_callback(ompt_callback_lock_init)( |
2601 | ompt_mutex_nest_lock, omp_lock_hint_none, |
2602 | __ompt_get_mutex_impl_type(user_lock), |
2603 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2604 | } |
2605 | #endif |
2606 | |
2607 | #else // KMP_USE_DYNAMIC_LOCK |
2608 | |
2609 | static char const *const func = "omp_init_nest_lock" ; |
2610 | kmp_user_lock_p lck; |
2611 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
2612 | |
2613 | if (__kmp_env_consistency_check) { |
2614 | if (user_lock == NULL) { |
2615 | KMP_FATAL(LockIsUninitialized, func); |
2616 | } |
2617 | } |
2618 | |
2619 | KMP_CHECK_USER_LOCK_INIT(); |
2620 | |
2621 | if ((__kmp_user_lock_kind == lk_tas) && |
2622 | (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= |
2623 | OMP_NEST_LOCK_T_SIZE)) { |
2624 | lck = (kmp_user_lock_p)user_lock; |
2625 | } |
2626 | #if KMP_USE_FUTEX |
2627 | else if ((__kmp_user_lock_kind == lk_futex) && |
2628 | (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= |
2629 | OMP_NEST_LOCK_T_SIZE)) { |
2630 | lck = (kmp_user_lock_p)user_lock; |
2631 | } |
2632 | #endif |
2633 | else { |
2634 | lck = __kmp_user_lock_allocate(user_lock, gtid, 0); |
2635 | } |
2636 | |
2637 | INIT_NESTED_LOCK(lck); |
2638 | __kmp_set_user_lock_location(lck, loc); |
2639 | |
2640 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2641 | // This is the case, if called from omp_init_lock_with_hint: |
2642 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2643 | if (!codeptr) |
2644 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2645 | if (ompt_enabled.ompt_callback_lock_init) { |
2646 | ompt_callbacks.ompt_callback(ompt_callback_lock_init)( |
2647 | ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), |
2648 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2649 | } |
2650 | #endif |
2651 | |
2652 | #if USE_ITT_BUILD |
2653 | __kmp_itt_lock_creating(lck); |
2654 | #endif /* USE_ITT_BUILD */ |
2655 | |
2656 | #endif // KMP_USE_DYNAMIC_LOCK |
2657 | } // __kmpc_init_nest_lock |
2658 | |
2659 | void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
2660 | #if KMP_USE_DYNAMIC_LOCK |
2661 | |
2662 | #if USE_ITT_BUILD |
2663 | kmp_user_lock_p lck; |
2664 | if (KMP_EXTRACT_D_TAG(user_lock) == 0) { |
2665 | lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; |
2666 | } else { |
2667 | lck = (kmp_user_lock_p)user_lock; |
2668 | } |
2669 | __kmp_itt_lock_destroyed(lock: lck); |
2670 | #endif |
2671 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2672 | // This is the case, if called from omp_init_lock_with_hint: |
2673 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2674 | if (!codeptr) |
2675 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2676 | if (ompt_enabled.ompt_callback_lock_destroy) { |
2677 | ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( |
2678 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2679 | } |
2680 | #endif |
2681 | KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); |
2682 | #else |
2683 | kmp_user_lock_p lck; |
2684 | |
2685 | if ((__kmp_user_lock_kind == lk_tas) && |
2686 | (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2687 | lck = (kmp_user_lock_p)user_lock; |
2688 | } |
2689 | #if KMP_USE_FUTEX |
2690 | else if ((__kmp_user_lock_kind == lk_futex) && |
2691 | (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2692 | lck = (kmp_user_lock_p)user_lock; |
2693 | } |
2694 | #endif |
2695 | else { |
2696 | lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock" ); |
2697 | } |
2698 | |
2699 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2700 | // This is the case, if called from omp_init_lock_with_hint: |
2701 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2702 | if (!codeptr) |
2703 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2704 | if (ompt_enabled.ompt_callback_lock_destroy) { |
2705 | ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( |
2706 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2707 | } |
2708 | #endif |
2709 | |
2710 | #if USE_ITT_BUILD |
2711 | __kmp_itt_lock_destroyed(lck); |
2712 | #endif /* USE_ITT_BUILD */ |
2713 | DESTROY_LOCK(lck); |
2714 | |
2715 | if ((__kmp_user_lock_kind == lk_tas) && |
2716 | (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2717 | ; |
2718 | } |
2719 | #if KMP_USE_FUTEX |
2720 | else if ((__kmp_user_lock_kind == lk_futex) && |
2721 | (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2722 | ; |
2723 | } |
2724 | #endif |
2725 | else { |
2726 | __kmp_user_lock_free(user_lock, gtid, lck); |
2727 | } |
2728 | #endif // KMP_USE_DYNAMIC_LOCK |
2729 | } // __kmpc_destroy_lock |
2730 | |
2731 | /* destroy the lock */ |
2732 | void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
2733 | #if KMP_USE_DYNAMIC_LOCK |
2734 | |
2735 | #if USE_ITT_BUILD |
2736 | kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); |
2737 | __kmp_itt_lock_destroyed(lock: ilk->lock); |
2738 | #endif |
2739 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2740 | // This is the case, if called from omp_init_lock_with_hint: |
2741 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2742 | if (!codeptr) |
2743 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2744 | if (ompt_enabled.ompt_callback_lock_destroy) { |
2745 | ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( |
2746 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2747 | } |
2748 | #endif |
2749 | KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); |
2750 | |
2751 | #else // KMP_USE_DYNAMIC_LOCK |
2752 | |
2753 | kmp_user_lock_p lck; |
2754 | |
2755 | if ((__kmp_user_lock_kind == lk_tas) && |
2756 | (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= |
2757 | OMP_NEST_LOCK_T_SIZE)) { |
2758 | lck = (kmp_user_lock_p)user_lock; |
2759 | } |
2760 | #if KMP_USE_FUTEX |
2761 | else if ((__kmp_user_lock_kind == lk_futex) && |
2762 | (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= |
2763 | OMP_NEST_LOCK_T_SIZE)) { |
2764 | lck = (kmp_user_lock_p)user_lock; |
2765 | } |
2766 | #endif |
2767 | else { |
2768 | lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock" ); |
2769 | } |
2770 | |
2771 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2772 | // This is the case, if called from omp_init_lock_with_hint: |
2773 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2774 | if (!codeptr) |
2775 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2776 | if (ompt_enabled.ompt_callback_lock_destroy) { |
2777 | ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( |
2778 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2779 | } |
2780 | #endif |
2781 | |
2782 | #if USE_ITT_BUILD |
2783 | __kmp_itt_lock_destroyed(lck); |
2784 | #endif /* USE_ITT_BUILD */ |
2785 | |
2786 | DESTROY_NESTED_LOCK(lck); |
2787 | |
2788 | if ((__kmp_user_lock_kind == lk_tas) && |
2789 | (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= |
2790 | OMP_NEST_LOCK_T_SIZE)) { |
2791 | ; |
2792 | } |
2793 | #if KMP_USE_FUTEX |
2794 | else if ((__kmp_user_lock_kind == lk_futex) && |
2795 | (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= |
2796 | OMP_NEST_LOCK_T_SIZE)) { |
2797 | ; |
2798 | } |
2799 | #endif |
2800 | else { |
2801 | __kmp_user_lock_free(user_lock, gtid, lck); |
2802 | } |
2803 | #endif // KMP_USE_DYNAMIC_LOCK |
2804 | } // __kmpc_destroy_nest_lock |
2805 | |
2806 | void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
2807 | KMP_COUNT_BLOCK(OMP_set_lock); |
2808 | #if KMP_USE_DYNAMIC_LOCK |
2809 | int tag = KMP_EXTRACT_D_TAG(user_lock); |
2810 | #if USE_ITT_BUILD |
2811 | __kmp_itt_lock_acquiring( |
2812 | lock: (kmp_user_lock_p) |
2813 | user_lock); // itt function will get to the right lock object. |
2814 | #endif |
2815 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2816 | // This is the case, if called from omp_init_lock_with_hint: |
2817 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2818 | if (!codeptr) |
2819 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2820 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
2821 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
2822 | ompt_mutex_lock, omp_lock_hint_none, |
2823 | __ompt_get_mutex_impl_type(user_lock), |
2824 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2825 | } |
2826 | #endif |
2827 | #if KMP_USE_INLINED_TAS |
2828 | if (tag == locktag_tas && !__kmp_env_consistency_check) { |
2829 | KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); |
2830 | } else |
2831 | #elif KMP_USE_INLINED_FUTEX |
2832 | if (tag == locktag_futex && !__kmp_env_consistency_check) { |
2833 | KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); |
2834 | } else |
2835 | #endif |
2836 | { |
2837 | __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); |
2838 | } |
2839 | #if USE_ITT_BUILD |
2840 | __kmp_itt_lock_acquired(lock: (kmp_user_lock_p)user_lock); |
2841 | #endif |
2842 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2843 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
2844 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
2845 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2846 | } |
2847 | #endif |
2848 | |
2849 | #else // KMP_USE_DYNAMIC_LOCK |
2850 | |
2851 | kmp_user_lock_p lck; |
2852 | |
2853 | if ((__kmp_user_lock_kind == lk_tas) && |
2854 | (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2855 | lck = (kmp_user_lock_p)user_lock; |
2856 | } |
2857 | #if KMP_USE_FUTEX |
2858 | else if ((__kmp_user_lock_kind == lk_futex) && |
2859 | (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { |
2860 | lck = (kmp_user_lock_p)user_lock; |
2861 | } |
2862 | #endif |
2863 | else { |
2864 | lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock" ); |
2865 | } |
2866 | |
2867 | #if USE_ITT_BUILD |
2868 | __kmp_itt_lock_acquiring(lck); |
2869 | #endif /* USE_ITT_BUILD */ |
2870 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2871 | // This is the case, if called from omp_init_lock_with_hint: |
2872 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2873 | if (!codeptr) |
2874 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2875 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
2876 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
2877 | ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), |
2878 | (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
2879 | } |
2880 | #endif |
2881 | |
2882 | ACQUIRE_LOCK(lck, gtid); |
2883 | |
2884 | #if USE_ITT_BUILD |
2885 | __kmp_itt_lock_acquired(lck); |
2886 | #endif /* USE_ITT_BUILD */ |
2887 | |
2888 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2889 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
2890 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
2891 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
2892 | } |
2893 | #endif |
2894 | |
2895 | #endif // KMP_USE_DYNAMIC_LOCK |
2896 | } |
2897 | |
2898 | void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
2899 | #if KMP_USE_DYNAMIC_LOCK |
2900 | |
2901 | #if USE_ITT_BUILD |
2902 | __kmp_itt_lock_acquiring(lock: (kmp_user_lock_p)user_lock); |
2903 | #endif |
2904 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2905 | // This is the case, if called from omp_init_lock_with_hint: |
2906 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2907 | if (!codeptr) |
2908 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2909 | if (ompt_enabled.enabled) { |
2910 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
2911 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
2912 | ompt_mutex_nest_lock, omp_lock_hint_none, |
2913 | __ompt_get_mutex_impl_type(user_lock), |
2914 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2915 | } |
2916 | } |
2917 | #endif |
2918 | int acquire_status = |
2919 | KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); |
2920 | (void)acquire_status; |
2921 | #if USE_ITT_BUILD |
2922 | __kmp_itt_lock_acquired(lock: (kmp_user_lock_p)user_lock); |
2923 | #endif |
2924 | |
2925 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2926 | if (ompt_enabled.enabled) { |
2927 | if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { |
2928 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
2929 | // lock_first |
2930 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
2931 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, |
2932 | codeptr); |
2933 | } |
2934 | } else { |
2935 | if (ompt_enabled.ompt_callback_nest_lock) { |
2936 | // lock_next |
2937 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
2938 | ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
2939 | } |
2940 | } |
2941 | } |
2942 | #endif |
2943 | |
2944 | #else // KMP_USE_DYNAMIC_LOCK |
2945 | int acquire_status; |
2946 | kmp_user_lock_p lck; |
2947 | |
2948 | if ((__kmp_user_lock_kind == lk_tas) && |
2949 | (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= |
2950 | OMP_NEST_LOCK_T_SIZE)) { |
2951 | lck = (kmp_user_lock_p)user_lock; |
2952 | } |
2953 | #if KMP_USE_FUTEX |
2954 | else if ((__kmp_user_lock_kind == lk_futex) && |
2955 | (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= |
2956 | OMP_NEST_LOCK_T_SIZE)) { |
2957 | lck = (kmp_user_lock_p)user_lock; |
2958 | } |
2959 | #endif |
2960 | else { |
2961 | lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock" ); |
2962 | } |
2963 | |
2964 | #if USE_ITT_BUILD |
2965 | __kmp_itt_lock_acquiring(lck); |
2966 | #endif /* USE_ITT_BUILD */ |
2967 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2968 | // This is the case, if called from omp_init_lock_with_hint: |
2969 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
2970 | if (!codeptr) |
2971 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
2972 | if (ompt_enabled.enabled) { |
2973 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
2974 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
2975 | ompt_mutex_nest_lock, omp_lock_hint_none, |
2976 | __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, |
2977 | codeptr); |
2978 | } |
2979 | } |
2980 | #endif |
2981 | |
2982 | ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); |
2983 | |
2984 | #if USE_ITT_BUILD |
2985 | __kmp_itt_lock_acquired(lck); |
2986 | #endif /* USE_ITT_BUILD */ |
2987 | |
2988 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
2989 | if (ompt_enabled.enabled) { |
2990 | if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { |
2991 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
2992 | // lock_first |
2993 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
2994 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
2995 | } |
2996 | } else { |
2997 | if (ompt_enabled.ompt_callback_nest_lock) { |
2998 | // lock_next |
2999 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
3000 | ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3001 | } |
3002 | } |
3003 | } |
3004 | #endif |
3005 | |
3006 | #endif // KMP_USE_DYNAMIC_LOCK |
3007 | } |
3008 | |
3009 | void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
3010 | #if KMP_USE_DYNAMIC_LOCK |
3011 | |
3012 | int tag = KMP_EXTRACT_D_TAG(user_lock); |
3013 | #if USE_ITT_BUILD |
3014 | __kmp_itt_lock_releasing(lock: (kmp_user_lock_p)user_lock); |
3015 | #endif |
3016 | #if KMP_USE_INLINED_TAS |
3017 | if (tag == locktag_tas && !__kmp_env_consistency_check) { |
3018 | KMP_RELEASE_TAS_LOCK(user_lock, gtid); |
3019 | } else |
3020 | #elif KMP_USE_INLINED_FUTEX |
3021 | if (tag == locktag_futex && !__kmp_env_consistency_check) { |
3022 | KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); |
3023 | } else |
3024 | #endif |
3025 | { |
3026 | __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); |
3027 | } |
3028 | |
3029 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3030 | // This is the case, if called from omp_init_lock_with_hint: |
3031 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3032 | if (!codeptr) |
3033 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3034 | if (ompt_enabled.ompt_callback_mutex_released) { |
3035 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
3036 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
3037 | } |
3038 | #endif |
3039 | |
3040 | #else // KMP_USE_DYNAMIC_LOCK |
3041 | |
3042 | kmp_user_lock_p lck; |
3043 | |
3044 | /* Can't use serial interval since not block structured */ |
3045 | /* release the lock */ |
3046 | |
3047 | if ((__kmp_user_lock_kind == lk_tas) && |
3048 | (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { |
3049 | #if KMP_OS_LINUX && \ |
3050 | (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) |
3051 | // "fast" path implemented to fix customer performance issue |
3052 | #if USE_ITT_BUILD |
3053 | __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); |
3054 | #endif /* USE_ITT_BUILD */ |
3055 | TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); |
3056 | KMP_MB(); |
3057 | |
3058 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3059 | // This is the case, if called from omp_init_lock_with_hint: |
3060 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3061 | if (!codeptr) |
3062 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3063 | if (ompt_enabled.ompt_callback_mutex_released) { |
3064 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
3065 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3066 | } |
3067 | #endif |
3068 | |
3069 | return; |
3070 | #else |
3071 | lck = (kmp_user_lock_p)user_lock; |
3072 | #endif |
3073 | } |
3074 | #if KMP_USE_FUTEX |
3075 | else if ((__kmp_user_lock_kind == lk_futex) && |
3076 | (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { |
3077 | lck = (kmp_user_lock_p)user_lock; |
3078 | } |
3079 | #endif |
3080 | else { |
3081 | lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock" ); |
3082 | } |
3083 | |
3084 | #if USE_ITT_BUILD |
3085 | __kmp_itt_lock_releasing(lck); |
3086 | #endif /* USE_ITT_BUILD */ |
3087 | |
3088 | RELEASE_LOCK(lck, gtid); |
3089 | |
3090 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3091 | // This is the case, if called from omp_init_lock_with_hint: |
3092 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3093 | if (!codeptr) |
3094 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3095 | if (ompt_enabled.ompt_callback_mutex_released) { |
3096 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
3097 | ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3098 | } |
3099 | #endif |
3100 | |
3101 | #endif // KMP_USE_DYNAMIC_LOCK |
3102 | } |
3103 | |
3104 | /* release the lock */ |
3105 | void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
3106 | #if KMP_USE_DYNAMIC_LOCK |
3107 | |
3108 | #if USE_ITT_BUILD |
3109 | __kmp_itt_lock_releasing(lock: (kmp_user_lock_p)user_lock); |
3110 | #endif |
3111 | int release_status = |
3112 | KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); |
3113 | (void)release_status; |
3114 | |
3115 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3116 | // This is the case, if called from omp_init_lock_with_hint: |
3117 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3118 | if (!codeptr) |
3119 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3120 | if (ompt_enabled.enabled) { |
3121 | if (release_status == KMP_LOCK_RELEASED) { |
3122 | if (ompt_enabled.ompt_callback_mutex_released) { |
3123 | // release_lock_last |
3124 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
3125 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, |
3126 | codeptr); |
3127 | } |
3128 | } else if (ompt_enabled.ompt_callback_nest_lock) { |
3129 | // release_lock_prev |
3130 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
3131 | ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
3132 | } |
3133 | } |
3134 | #endif |
3135 | |
3136 | #else // KMP_USE_DYNAMIC_LOCK |
3137 | |
3138 | kmp_user_lock_p lck; |
3139 | |
3140 | /* Can't use serial interval since not block structured */ |
3141 | |
3142 | if ((__kmp_user_lock_kind == lk_tas) && |
3143 | (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= |
3144 | OMP_NEST_LOCK_T_SIZE)) { |
3145 | #if KMP_OS_LINUX && \ |
3146 | (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) |
3147 | // "fast" path implemented to fix customer performance issue |
3148 | kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; |
3149 | #if USE_ITT_BUILD |
3150 | __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); |
3151 | #endif /* USE_ITT_BUILD */ |
3152 | |
3153 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3154 | int release_status = KMP_LOCK_STILL_HELD; |
3155 | #endif |
3156 | |
3157 | if (--(tl->lk.depth_locked) == 0) { |
3158 | TCW_4(tl->lk.poll, 0); |
3159 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3160 | release_status = KMP_LOCK_RELEASED; |
3161 | #endif |
3162 | } |
3163 | KMP_MB(); |
3164 | |
3165 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3166 | // This is the case, if called from omp_init_lock_with_hint: |
3167 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3168 | if (!codeptr) |
3169 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3170 | if (ompt_enabled.enabled) { |
3171 | if (release_status == KMP_LOCK_RELEASED) { |
3172 | if (ompt_enabled.ompt_callback_mutex_released) { |
3173 | // release_lock_last |
3174 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
3175 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3176 | } |
3177 | } else if (ompt_enabled.ompt_callback_nest_lock) { |
3178 | // release_lock_previous |
3179 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
3180 | ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3181 | } |
3182 | } |
3183 | #endif |
3184 | |
3185 | return; |
3186 | #else |
3187 | lck = (kmp_user_lock_p)user_lock; |
3188 | #endif |
3189 | } |
3190 | #if KMP_USE_FUTEX |
3191 | else if ((__kmp_user_lock_kind == lk_futex) && |
3192 | (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= |
3193 | OMP_NEST_LOCK_T_SIZE)) { |
3194 | lck = (kmp_user_lock_p)user_lock; |
3195 | } |
3196 | #endif |
3197 | else { |
3198 | lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock" ); |
3199 | } |
3200 | |
3201 | #if USE_ITT_BUILD |
3202 | __kmp_itt_lock_releasing(lck); |
3203 | #endif /* USE_ITT_BUILD */ |
3204 | |
3205 | int release_status; |
3206 | release_status = RELEASE_NESTED_LOCK(lck, gtid); |
3207 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3208 | // This is the case, if called from omp_init_lock_with_hint: |
3209 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3210 | if (!codeptr) |
3211 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3212 | if (ompt_enabled.enabled) { |
3213 | if (release_status == KMP_LOCK_RELEASED) { |
3214 | if (ompt_enabled.ompt_callback_mutex_released) { |
3215 | // release_lock_last |
3216 | ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( |
3217 | ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3218 | } |
3219 | } else if (ompt_enabled.ompt_callback_nest_lock) { |
3220 | // release_lock_previous |
3221 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
3222 | ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3223 | } |
3224 | } |
3225 | #endif |
3226 | |
3227 | #endif // KMP_USE_DYNAMIC_LOCK |
3228 | } |
3229 | |
3230 | /* try to acquire the lock */ |
3231 | int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
3232 | KMP_COUNT_BLOCK(OMP_test_lock); |
3233 | |
3234 | #if KMP_USE_DYNAMIC_LOCK |
3235 | int rc; |
3236 | int tag = KMP_EXTRACT_D_TAG(user_lock); |
3237 | #if USE_ITT_BUILD |
3238 | __kmp_itt_lock_acquiring(lock: (kmp_user_lock_p)user_lock); |
3239 | #endif |
3240 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3241 | // This is the case, if called from omp_init_lock_with_hint: |
3242 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3243 | if (!codeptr) |
3244 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3245 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
3246 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
3247 | ompt_mutex_test_lock, omp_lock_hint_none, |
3248 | __ompt_get_mutex_impl_type(user_lock), |
3249 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
3250 | } |
3251 | #endif |
3252 | #if KMP_USE_INLINED_TAS |
3253 | if (tag == locktag_tas && !__kmp_env_consistency_check) { |
3254 | KMP_TEST_TAS_LOCK(user_lock, gtid, rc); |
3255 | } else |
3256 | #elif KMP_USE_INLINED_FUTEX |
3257 | if (tag == locktag_futex && !__kmp_env_consistency_check) { |
3258 | KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); |
3259 | } else |
3260 | #endif |
3261 | { |
3262 | rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); |
3263 | } |
3264 | if (rc) { |
3265 | #if USE_ITT_BUILD |
3266 | __kmp_itt_lock_acquired(lock: (kmp_user_lock_p)user_lock); |
3267 | #endif |
3268 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3269 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
3270 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
3271 | ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
3272 | } |
3273 | #endif |
3274 | return FTN_TRUE; |
3275 | } else { |
3276 | #if USE_ITT_BUILD |
3277 | __kmp_itt_lock_cancelled(lock: (kmp_user_lock_p)user_lock); |
3278 | #endif |
3279 | return FTN_FALSE; |
3280 | } |
3281 | |
3282 | #else // KMP_USE_DYNAMIC_LOCK |
3283 | |
3284 | kmp_user_lock_p lck; |
3285 | int rc; |
3286 | |
3287 | if ((__kmp_user_lock_kind == lk_tas) && |
3288 | (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { |
3289 | lck = (kmp_user_lock_p)user_lock; |
3290 | } |
3291 | #if KMP_USE_FUTEX |
3292 | else if ((__kmp_user_lock_kind == lk_futex) && |
3293 | (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { |
3294 | lck = (kmp_user_lock_p)user_lock; |
3295 | } |
3296 | #endif |
3297 | else { |
3298 | lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock" ); |
3299 | } |
3300 | |
3301 | #if USE_ITT_BUILD |
3302 | __kmp_itt_lock_acquiring(lck); |
3303 | #endif /* USE_ITT_BUILD */ |
3304 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3305 | // This is the case, if called from omp_init_lock_with_hint: |
3306 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3307 | if (!codeptr) |
3308 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3309 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
3310 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
3311 | ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), |
3312 | (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3313 | } |
3314 | #endif |
3315 | |
3316 | rc = TEST_LOCK(lck, gtid); |
3317 | #if USE_ITT_BUILD |
3318 | if (rc) { |
3319 | __kmp_itt_lock_acquired(lck); |
3320 | } else { |
3321 | __kmp_itt_lock_cancelled(lck); |
3322 | } |
3323 | #endif /* USE_ITT_BUILD */ |
3324 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3325 | if (rc && ompt_enabled.ompt_callback_mutex_acquired) { |
3326 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
3327 | ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3328 | } |
3329 | #endif |
3330 | |
3331 | return (rc ? FTN_TRUE : FTN_FALSE); |
3332 | |
3333 | /* Can't use serial interval since not block structured */ |
3334 | |
3335 | #endif // KMP_USE_DYNAMIC_LOCK |
3336 | } |
3337 | |
3338 | /* try to acquire the lock */ |
3339 | int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { |
3340 | #if KMP_USE_DYNAMIC_LOCK |
3341 | int rc; |
3342 | #if USE_ITT_BUILD |
3343 | __kmp_itt_lock_acquiring(lock: (kmp_user_lock_p)user_lock); |
3344 | #endif |
3345 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3346 | // This is the case, if called from omp_init_lock_with_hint: |
3347 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3348 | if (!codeptr) |
3349 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3350 | if (ompt_enabled.ompt_callback_mutex_acquire) { |
3351 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
3352 | ompt_mutex_test_nest_lock, omp_lock_hint_none, |
3353 | __ompt_get_mutex_impl_type(user_lock), |
3354 | (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
3355 | } |
3356 | #endif |
3357 | rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); |
3358 | #if USE_ITT_BUILD |
3359 | if (rc) { |
3360 | __kmp_itt_lock_acquired(lock: (kmp_user_lock_p)user_lock); |
3361 | } else { |
3362 | __kmp_itt_lock_cancelled(lock: (kmp_user_lock_p)user_lock); |
3363 | } |
3364 | #endif |
3365 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3366 | if (ompt_enabled.enabled && rc) { |
3367 | if (rc == 1) { |
3368 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
3369 | // lock_first |
3370 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
3371 | ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, |
3372 | codeptr); |
3373 | } |
3374 | } else { |
3375 | if (ompt_enabled.ompt_callback_nest_lock) { |
3376 | // lock_next |
3377 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
3378 | ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); |
3379 | } |
3380 | } |
3381 | } |
3382 | #endif |
3383 | return rc; |
3384 | |
3385 | #else // KMP_USE_DYNAMIC_LOCK |
3386 | |
3387 | kmp_user_lock_p lck; |
3388 | int rc; |
3389 | |
3390 | if ((__kmp_user_lock_kind == lk_tas) && |
3391 | (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= |
3392 | OMP_NEST_LOCK_T_SIZE)) { |
3393 | lck = (kmp_user_lock_p)user_lock; |
3394 | } |
3395 | #if KMP_USE_FUTEX |
3396 | else if ((__kmp_user_lock_kind == lk_futex) && |
3397 | (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= |
3398 | OMP_NEST_LOCK_T_SIZE)) { |
3399 | lck = (kmp_user_lock_p)user_lock; |
3400 | } |
3401 | #endif |
3402 | else { |
3403 | lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock" ); |
3404 | } |
3405 | |
3406 | #if USE_ITT_BUILD |
3407 | __kmp_itt_lock_acquiring(lck); |
3408 | #endif /* USE_ITT_BUILD */ |
3409 | |
3410 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3411 | // This is the case, if called from omp_init_lock_with_hint: |
3412 | void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
3413 | if (!codeptr) |
3414 | codeptr = OMPT_GET_RETURN_ADDRESS(0); |
3415 | if (ompt_enabled.enabled) && |
3416 | ompt_enabled.ompt_callback_mutex_acquire) { |
3417 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( |
3418 | ompt_mutex_test_nest_lock, omp_lock_hint_none, |
3419 | __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, |
3420 | codeptr); |
3421 | } |
3422 | #endif |
3423 | |
3424 | rc = TEST_NESTED_LOCK(lck, gtid); |
3425 | #if USE_ITT_BUILD |
3426 | if (rc) { |
3427 | __kmp_itt_lock_acquired(lck); |
3428 | } else { |
3429 | __kmp_itt_lock_cancelled(lck); |
3430 | } |
3431 | #endif /* USE_ITT_BUILD */ |
3432 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3433 | if (ompt_enabled.enabled && rc) { |
3434 | if (rc == 1) { |
3435 | if (ompt_enabled.ompt_callback_mutex_acquired) { |
3436 | // lock_first |
3437 | ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( |
3438 | ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3439 | } |
3440 | } else { |
3441 | if (ompt_enabled.ompt_callback_nest_lock) { |
3442 | // lock_next |
3443 | ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( |
3444 | ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); |
3445 | } |
3446 | } |
3447 | } |
3448 | #endif |
3449 | return rc; |
3450 | |
3451 | /* Can't use serial interval since not block structured */ |
3452 | |
3453 | #endif // KMP_USE_DYNAMIC_LOCK |
3454 | } |
3455 | |
3456 | // Interface to fast scalable reduce methods routines |
3457 | |
3458 | // keep the selected method in a thread local structure for cross-function |
3459 | // usage: will be used in __kmpc_end_reduce* functions; |
3460 | // another solution: to re-determine the method one more time in |
3461 | // __kmpc_end_reduce* functions (new prototype required then) |
3462 | // AT: which solution is better? |
3463 | #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ |
3464 | ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) |
3465 | |
3466 | #define __KMP_GET_REDUCTION_METHOD(gtid) \ |
3467 | (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) |
3468 | |
3469 | // description of the packed_reduction_method variable: look at the macros in |
3470 | // kmp.h |
3471 | |
3472 | // used in a critical section reduce block |
3473 | static __forceinline void |
3474 | __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, |
3475 | kmp_critical_name *crit) { |
3476 | |
3477 | // this lock was visible to a customer and to the threading profile tool as a |
3478 | // serial overhead span (although it's used for an internal purpose only) |
3479 | // why was it visible in previous implementation? |
3480 | // should we keep it visible in new reduce block? |
3481 | kmp_user_lock_p lck; |
3482 | |
3483 | #if KMP_USE_DYNAMIC_LOCK |
3484 | |
3485 | kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; |
3486 | // Check if it is initialized. |
3487 | if (*lk == 0) { |
3488 | if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { |
3489 | (void)KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, |
3490 | KMP_GET_D_TAG(__kmp_user_lock_seq)); |
3491 | } else { |
3492 | __kmp_init_indirect_csptr(crit, loc, gtid: global_tid, |
3493 | KMP_GET_I_TAG(__kmp_user_lock_seq)); |
3494 | } |
3495 | } |
3496 | // Branch for accessing the actual lock object and set operation. This |
3497 | // branching is inevitable since this lock initialization does not follow the |
3498 | // normal dispatch path (lock table is not used). |
3499 | if (KMP_EXTRACT_D_TAG(lk) != 0) { |
3500 | lck = (kmp_user_lock_p)lk; |
3501 | KMP_DEBUG_ASSERT(lck != NULL); |
3502 | if (__kmp_env_consistency_check) { |
3503 | __kmp_push_sync(gtid: global_tid, ct: ct_critical, ident: loc, name: lck, __kmp_user_lock_seq); |
3504 | } |
3505 | KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); |
3506 | } else { |
3507 | kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); |
3508 | lck = ilk->lock; |
3509 | KMP_DEBUG_ASSERT(lck != NULL); |
3510 | if (__kmp_env_consistency_check) { |
3511 | __kmp_push_sync(gtid: global_tid, ct: ct_critical, ident: loc, name: lck, __kmp_user_lock_seq); |
3512 | } |
3513 | KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); |
3514 | } |
3515 | |
3516 | #else // KMP_USE_DYNAMIC_LOCK |
3517 | |
3518 | // We know that the fast reduction code is only emitted by Intel compilers |
3519 | // with 32 byte critical sections. If there isn't enough space, then we |
3520 | // have to use a pointer. |
3521 | if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { |
3522 | lck = (kmp_user_lock_p)crit; |
3523 | } else { |
3524 | lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); |
3525 | } |
3526 | KMP_DEBUG_ASSERT(lck != NULL); |
3527 | |
3528 | if (__kmp_env_consistency_check) |
3529 | __kmp_push_sync(global_tid, ct_critical, loc, lck); |
3530 | |
3531 | __kmp_acquire_user_lock_with_checks(lck, global_tid); |
3532 | |
3533 | #endif // KMP_USE_DYNAMIC_LOCK |
3534 | } |
3535 | |
3536 | // used in a critical section reduce block |
3537 | static __forceinline void |
3538 | __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, |
3539 | kmp_critical_name *crit) { |
3540 | |
3541 | kmp_user_lock_p lck; |
3542 | |
3543 | #if KMP_USE_DYNAMIC_LOCK |
3544 | |
3545 | if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { |
3546 | lck = (kmp_user_lock_p)crit; |
3547 | if (__kmp_env_consistency_check) |
3548 | __kmp_pop_sync(gtid: global_tid, ct: ct_critical, ident: loc); |
3549 | KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); |
3550 | } else { |
3551 | kmp_indirect_lock_t *ilk = |
3552 | (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); |
3553 | if (__kmp_env_consistency_check) |
3554 | __kmp_pop_sync(gtid: global_tid, ct: ct_critical, ident: loc); |
3555 | KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); |
3556 | } |
3557 | |
3558 | #else // KMP_USE_DYNAMIC_LOCK |
3559 | |
3560 | // We know that the fast reduction code is only emitted by Intel compilers |
3561 | // with 32 byte critical sections. If there isn't enough space, then we have |
3562 | // to use a pointer. |
3563 | if (__kmp_base_user_lock_size > 32) { |
3564 | lck = *((kmp_user_lock_p *)crit); |
3565 | KMP_ASSERT(lck != NULL); |
3566 | } else { |
3567 | lck = (kmp_user_lock_p)crit; |
3568 | } |
3569 | |
3570 | if (__kmp_env_consistency_check) |
3571 | __kmp_pop_sync(global_tid, ct_critical, loc); |
3572 | |
3573 | __kmp_release_user_lock_with_checks(lck, global_tid); |
3574 | |
3575 | #endif // KMP_USE_DYNAMIC_LOCK |
3576 | } // __kmp_end_critical_section_reduce_block |
3577 | |
3578 | static __forceinline int |
3579 | __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, |
3580 | int *task_state) { |
3581 | kmp_team_t *team; |
3582 | |
3583 | // Check if we are inside the teams construct? |
3584 | if (th->th.th_teams_microtask) { |
3585 | *team_p = team = th->th.th_team; |
3586 | if (team->t.t_level == th->th.th_teams_level) { |
3587 | // This is reduction at teams construct. |
3588 | KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 |
3589 | // Let's swap teams temporarily for the reduction. |
3590 | th->th.th_info.ds.ds_tid = team->t.t_master_tid; |
3591 | th->th.th_team = team->t.t_parent; |
3592 | th->th.th_team_nproc = th->th.th_team->t.t_nproc; |
3593 | th->th.th_task_team = th->th.th_team->t.t_task_team[0]; |
3594 | *task_state = th->th.th_task_state; |
3595 | th->th.th_task_state = 0; |
3596 | |
3597 | return 1; |
3598 | } |
3599 | } |
3600 | return 0; |
3601 | } |
3602 | |
3603 | static __forceinline void |
3604 | __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { |
3605 | // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. |
3606 | th->th.th_info.ds.ds_tid = 0; |
3607 | th->th.th_team = team; |
3608 | th->th.th_team_nproc = team->t.t_nproc; |
3609 | th->th.th_task_team = team->t.t_task_team[task_state]; |
3610 | __kmp_type_convert(src: task_state, dest: &(th->th.th_task_state)); |
3611 | } |
3612 | |
3613 | /* 2.a.i. Reduce Block without a terminating barrier */ |
3614 | /*! |
3615 | @ingroup SYNCHRONIZATION |
3616 | @param loc source location information |
3617 | @param global_tid global thread number |
3618 | @param num_vars number of items (variables) to be reduced |
3619 | @param reduce_size size of data in bytes to be reduced |
3620 | @param reduce_data pointer to data to be reduced |
3621 | @param reduce_func callback function providing reduction operation on two |
3622 | operands and returning result of reduction in lhs_data |
3623 | @param lck pointer to the unique lock data structure |
3624 | @result 1 for the primary thread, 0 for all other team threads, 2 for all team |
3625 | threads if atomic reduction needed |
3626 | |
3627 | The nowait version is used for a reduce clause with the nowait argument. |
3628 | */ |
3629 | kmp_int32 |
3630 | __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, |
3631 | size_t reduce_size, void *reduce_data, |
3632 | void (*reduce_func)(void *lhs_data, void *rhs_data), |
3633 | kmp_critical_name *lck) { |
3634 | |
3635 | KMP_COUNT_BLOCK(REDUCE_nowait); |
3636 | int retval = 0; |
3637 | PACKED_REDUCTION_METHOD_T packed_reduction_method; |
3638 | kmp_info_t *th; |
3639 | kmp_team_t *team; |
3640 | int teams_swapped = 0, task_state; |
3641 | KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n" , global_tid)); |
3642 | __kmp_assert_valid_gtid(gtid: global_tid); |
3643 | |
3644 | // why do we need this initialization here at all? |
3645 | // Reduction clause can not be used as a stand-alone directive. |
3646 | |
3647 | // do not call __kmp_serial_initialize(), it will be called by |
3648 | // __kmp_parallel_initialize() if needed |
3649 | // possible detection of false-positive race by the threadchecker ??? |
3650 | if (!TCR_4(__kmp_init_parallel)) |
3651 | __kmp_parallel_initialize(); |
3652 | |
3653 | __kmp_resume_if_soft_paused(); |
3654 | |
3655 | // check correctness of reduce block nesting |
3656 | #if KMP_USE_DYNAMIC_LOCK |
3657 | if (__kmp_env_consistency_check) |
3658 | __kmp_push_sync(gtid: global_tid, ct: ct_reduce, ident: loc, NULL, 0); |
3659 | #else |
3660 | if (__kmp_env_consistency_check) |
3661 | __kmp_push_sync(global_tid, ct_reduce, loc, NULL); |
3662 | #endif |
3663 | |
3664 | th = __kmp_thread_from_gtid(gtid: global_tid); |
3665 | teams_swapped = __kmp_swap_teams_for_teams_reduction(th, team_p: &team, task_state: &task_state); |
3666 | |
3667 | // packed_reduction_method value will be reused by __kmp_end_reduce* function, |
3668 | // the value should be kept in a variable |
3669 | // the variable should be either a construct-specific or thread-specific |
3670 | // property, not a team specific property |
3671 | // (a thread can reach the next reduce block on the next construct, reduce |
3672 | // method may differ on the next construct) |
3673 | // an ident_t "loc" parameter could be used as a construct-specific property |
3674 | // (what if loc == 0?) |
3675 | // (if both construct-specific and team-specific variables were shared, |
3676 | // then unness extra syncs should be needed) |
3677 | // a thread-specific variable is better regarding two issues above (next |
3678 | // construct and extra syncs) |
3679 | // a thread-specific "th_local.reduction_method" variable is used currently |
3680 | // each thread executes 'determine' and 'set' lines (no need to execute by one |
3681 | // thread, to avoid unness extra syncs) |
3682 | |
3683 | packed_reduction_method = __kmp_determine_reduction_method( |
3684 | loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); |
3685 | __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); |
3686 | |
3687 | OMPT_REDUCTION_DECL(th, global_tid); |
3688 | if (packed_reduction_method == critical_reduce_block) { |
3689 | |
3690 | OMPT_REDUCTION_BEGIN; |
3691 | |
3692 | __kmp_enter_critical_section_reduce_block(loc, global_tid, crit: lck); |
3693 | retval = 1; |
3694 | |
3695 | } else if (packed_reduction_method == empty_reduce_block) { |
3696 | |
3697 | OMPT_REDUCTION_BEGIN; |
3698 | |
3699 | // usage: if team size == 1, no synchronization is required ( Intel |
3700 | // platforms only ) |
3701 | retval = 1; |
3702 | |
3703 | } else if (packed_reduction_method == atomic_reduce_block) { |
3704 | |
3705 | retval = 2; |
3706 | |
3707 | // all threads should do this pop here (because __kmpc_end_reduce_nowait() |
3708 | // won't be called by the code gen) |
3709 | // (it's not quite good, because the checking block has been closed by |
3710 | // this 'pop', |
3711 | // but atomic operation has not been executed yet, will be executed |
3712 | // slightly later, literally on next instruction) |
3713 | if (__kmp_env_consistency_check) |
3714 | __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc); |
3715 | |
3716 | } else if (TEST_REDUCTION_METHOD(packed_reduction_method, |
3717 | tree_reduce_block)) { |
3718 | |
3719 | // AT: performance issue: a real barrier here |
3720 | // AT: (if primary thread is slow, other threads are blocked here waiting for |
3721 | // the primary thread to come and release them) |
3722 | // AT: (it's not what a customer might expect specifying NOWAIT clause) |
3723 | // AT: (specifying NOWAIT won't result in improvement of performance, it'll |
3724 | // be confusing to a customer) |
3725 | // AT: another implementation of *barrier_gather*nowait() (or some other design) |
3726 | // might go faster and be more in line with sense of NOWAIT |
3727 | // AT: TO DO: do epcc test and compare times |
3728 | |
3729 | // this barrier should be invisible to a customer and to the threading profile |
3730 | // tool (it's neither a terminating barrier nor customer's code, it's |
3731 | // used for an internal purpose) |
3732 | #if OMPT_SUPPORT |
3733 | // JP: can this barrier potentially leed to task scheduling? |
3734 | // JP: as long as there is a barrier in the implementation, OMPT should and |
3735 | // will provide the barrier events |
3736 | // so we set-up the necessary frame/return addresses. |
3737 | ompt_frame_t *ompt_frame; |
3738 | if (ompt_enabled.enabled) { |
3739 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
3740 | if (ompt_frame->enter_frame.ptr == NULL) |
3741 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
3742 | } |
3743 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
3744 | #endif |
3745 | #if USE_ITT_NOTIFY |
3746 | __kmp_threads[global_tid]->th.th_ident = loc; |
3747 | #endif |
3748 | retval = |
3749 | __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), |
3750 | gtid: global_tid, FALSE, reduce_size, reduce_data, reduce: reduce_func); |
3751 | retval = (retval != 0) ? (0) : (1); |
3752 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3753 | if (ompt_enabled.enabled) { |
3754 | ompt_frame->enter_frame = ompt_data_none; |
3755 | } |
3756 | #endif |
3757 | |
3758 | // all other workers except primary thread should do this pop here |
3759 | // ( none of other workers will get to __kmpc_end_reduce_nowait() ) |
3760 | if (__kmp_env_consistency_check) { |
3761 | if (retval == 0) { |
3762 | __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc); |
3763 | } |
3764 | } |
3765 | |
3766 | } else { |
3767 | |
3768 | // should never reach this block |
3769 | KMP_ASSERT(0); // "unexpected method" |
3770 | } |
3771 | if (teams_swapped) { |
3772 | __kmp_restore_swapped_teams(th, team, task_state); |
3773 | } |
3774 | KA_TRACE( |
3775 | 10, |
3776 | ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n" , |
3777 | global_tid, packed_reduction_method, retval)); |
3778 | |
3779 | return retval; |
3780 | } |
3781 | |
3782 | /*! |
3783 | @ingroup SYNCHRONIZATION |
3784 | @param loc source location information |
3785 | @param global_tid global thread id. |
3786 | @param lck pointer to the unique lock data structure |
3787 | |
3788 | Finish the execution of a reduce nowait. |
3789 | */ |
3790 | void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, |
3791 | kmp_critical_name *lck) { |
3792 | |
3793 | PACKED_REDUCTION_METHOD_T packed_reduction_method; |
3794 | |
3795 | KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n" , global_tid)); |
3796 | __kmp_assert_valid_gtid(gtid: global_tid); |
3797 | |
3798 | packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); |
3799 | |
3800 | OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); |
3801 | |
3802 | if (packed_reduction_method == critical_reduce_block) { |
3803 | |
3804 | __kmp_end_critical_section_reduce_block(loc, global_tid, crit: lck); |
3805 | OMPT_REDUCTION_END; |
3806 | |
3807 | } else if (packed_reduction_method == empty_reduce_block) { |
3808 | |
3809 | // usage: if team size == 1, no synchronization is required ( on Intel |
3810 | // platforms only ) |
3811 | |
3812 | OMPT_REDUCTION_END; |
3813 | |
3814 | } else if (packed_reduction_method == atomic_reduce_block) { |
3815 | |
3816 | // neither primary thread nor other workers should get here |
3817 | // (code gen does not generate this call in case 2: atomic reduce block) |
3818 | // actually it's better to remove this elseif at all; |
3819 | // after removal this value will checked by the 'else' and will assert |
3820 | |
3821 | } else if (TEST_REDUCTION_METHOD(packed_reduction_method, |
3822 | tree_reduce_block)) { |
3823 | |
3824 | // only primary thread gets here |
3825 | // OMPT: tree reduction is annotated in the barrier code |
3826 | |
3827 | } else { |
3828 | |
3829 | // should never reach this block |
3830 | KMP_ASSERT(0); // "unexpected method" |
3831 | } |
3832 | |
3833 | if (__kmp_env_consistency_check) |
3834 | __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc); |
3835 | |
3836 | KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n" , |
3837 | global_tid, packed_reduction_method)); |
3838 | |
3839 | return; |
3840 | } |
3841 | |
3842 | /* 2.a.ii. Reduce Block with a terminating barrier */ |
3843 | |
3844 | /*! |
3845 | @ingroup SYNCHRONIZATION |
3846 | @param loc source location information |
3847 | @param global_tid global thread number |
3848 | @param num_vars number of items (variables) to be reduced |
3849 | @param reduce_size size of data in bytes to be reduced |
3850 | @param reduce_data pointer to data to be reduced |
3851 | @param reduce_func callback function providing reduction operation on two |
3852 | operands and returning result of reduction in lhs_data |
3853 | @param lck pointer to the unique lock data structure |
3854 | @result 1 for the primary thread, 0 for all other team threads, 2 for all team |
3855 | threads if atomic reduction needed |
3856 | |
3857 | A blocking reduce that includes an implicit barrier. |
3858 | */ |
3859 | kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, |
3860 | size_t reduce_size, void *reduce_data, |
3861 | void (*reduce_func)(void *lhs_data, void *rhs_data), |
3862 | kmp_critical_name *lck) { |
3863 | KMP_COUNT_BLOCK(REDUCE_wait); |
3864 | int retval = 0; |
3865 | PACKED_REDUCTION_METHOD_T packed_reduction_method; |
3866 | kmp_info_t *th; |
3867 | kmp_team_t *team; |
3868 | int teams_swapped = 0, task_state; |
3869 | |
3870 | KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n" , global_tid)); |
3871 | __kmp_assert_valid_gtid(gtid: global_tid); |
3872 | |
3873 | // why do we need this initialization here at all? |
3874 | // Reduction clause can not be a stand-alone directive. |
3875 | |
3876 | // do not call __kmp_serial_initialize(), it will be called by |
3877 | // __kmp_parallel_initialize() if needed |
3878 | // possible detection of false-positive race by the threadchecker ??? |
3879 | if (!TCR_4(__kmp_init_parallel)) |
3880 | __kmp_parallel_initialize(); |
3881 | |
3882 | __kmp_resume_if_soft_paused(); |
3883 | |
3884 | // check correctness of reduce block nesting |
3885 | #if KMP_USE_DYNAMIC_LOCK |
3886 | if (__kmp_env_consistency_check) |
3887 | __kmp_push_sync(gtid: global_tid, ct: ct_reduce, ident: loc, NULL, 0); |
3888 | #else |
3889 | if (__kmp_env_consistency_check) |
3890 | __kmp_push_sync(global_tid, ct_reduce, loc, NULL); |
3891 | #endif |
3892 | |
3893 | th = __kmp_thread_from_gtid(gtid: global_tid); |
3894 | teams_swapped = __kmp_swap_teams_for_teams_reduction(th, team_p: &team, task_state: &task_state); |
3895 | |
3896 | packed_reduction_method = __kmp_determine_reduction_method( |
3897 | loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); |
3898 | __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); |
3899 | |
3900 | OMPT_REDUCTION_DECL(th, global_tid); |
3901 | |
3902 | if (packed_reduction_method == critical_reduce_block) { |
3903 | |
3904 | OMPT_REDUCTION_BEGIN; |
3905 | __kmp_enter_critical_section_reduce_block(loc, global_tid, crit: lck); |
3906 | retval = 1; |
3907 | |
3908 | } else if (packed_reduction_method == empty_reduce_block) { |
3909 | |
3910 | OMPT_REDUCTION_BEGIN; |
3911 | // usage: if team size == 1, no synchronization is required ( Intel |
3912 | // platforms only ) |
3913 | retval = 1; |
3914 | |
3915 | } else if (packed_reduction_method == atomic_reduce_block) { |
3916 | |
3917 | retval = 2; |
3918 | |
3919 | } else if (TEST_REDUCTION_METHOD(packed_reduction_method, |
3920 | tree_reduce_block)) { |
3921 | |
3922 | // case tree_reduce_block: |
3923 | // this barrier should be visible to a customer and to the threading profile |
3924 | // tool (it's a terminating barrier on constructs if NOWAIT not specified) |
3925 | #if OMPT_SUPPORT |
3926 | ompt_frame_t *ompt_frame; |
3927 | if (ompt_enabled.enabled) { |
3928 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
3929 | if (ompt_frame->enter_frame.ptr == NULL) |
3930 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
3931 | } |
3932 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
3933 | #endif |
3934 | #if USE_ITT_NOTIFY |
3935 | __kmp_threads[global_tid]->th.th_ident = |
3936 | loc; // needed for correct notification of frames |
3937 | #endif |
3938 | retval = |
3939 | __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), |
3940 | gtid: global_tid, TRUE, reduce_size, reduce_data, reduce: reduce_func); |
3941 | retval = (retval != 0) ? (0) : (1); |
3942 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
3943 | if (ompt_enabled.enabled) { |
3944 | ompt_frame->enter_frame = ompt_data_none; |
3945 | } |
3946 | #endif |
3947 | |
3948 | // all other workers except primary thread should do this pop here |
3949 | // (none of other workers except primary will enter __kmpc_end_reduce()) |
3950 | if (__kmp_env_consistency_check) { |
3951 | if (retval == 0) { // 0: all other workers; 1: primary thread |
3952 | __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc); |
3953 | } |
3954 | } |
3955 | |
3956 | } else { |
3957 | |
3958 | // should never reach this block |
3959 | KMP_ASSERT(0); // "unexpected method" |
3960 | } |
3961 | if (teams_swapped) { |
3962 | __kmp_restore_swapped_teams(th, team, task_state); |
3963 | } |
3964 | |
3965 | KA_TRACE(10, |
3966 | ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n" , |
3967 | global_tid, packed_reduction_method, retval)); |
3968 | return retval; |
3969 | } |
3970 | |
3971 | /*! |
3972 | @ingroup SYNCHRONIZATION |
3973 | @param loc source location information |
3974 | @param global_tid global thread id. |
3975 | @param lck pointer to the unique lock data structure |
3976 | |
3977 | Finish the execution of a blocking reduce. |
3978 | The <tt>lck</tt> pointer must be the same as that used in the corresponding |
3979 | start function. |
3980 | */ |
3981 | void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, |
3982 | kmp_critical_name *lck) { |
3983 | |
3984 | PACKED_REDUCTION_METHOD_T packed_reduction_method; |
3985 | kmp_info_t *th; |
3986 | kmp_team_t *team; |
3987 | int teams_swapped = 0, task_state; |
3988 | |
3989 | KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n" , global_tid)); |
3990 | __kmp_assert_valid_gtid(gtid: global_tid); |
3991 | |
3992 | th = __kmp_thread_from_gtid(gtid: global_tid); |
3993 | teams_swapped = __kmp_swap_teams_for_teams_reduction(th, team_p: &team, task_state: &task_state); |
3994 | |
3995 | packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); |
3996 | |
3997 | // this barrier should be visible to a customer and to the threading profile |
3998 | // tool (it's a terminating barrier on constructs if NOWAIT not specified) |
3999 | OMPT_REDUCTION_DECL(th, global_tid); |
4000 | |
4001 | if (packed_reduction_method == critical_reduce_block) { |
4002 | __kmp_end_critical_section_reduce_block(loc, global_tid, crit: lck); |
4003 | |
4004 | OMPT_REDUCTION_END; |
4005 | |
4006 | // TODO: implicit barrier: should be exposed |
4007 | #if OMPT_SUPPORT |
4008 | ompt_frame_t *ompt_frame; |
4009 | if (ompt_enabled.enabled) { |
4010 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
4011 | if (ompt_frame->enter_frame.ptr == NULL) |
4012 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
4013 | } |
4014 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
4015 | #endif |
4016 | #if USE_ITT_NOTIFY |
4017 | __kmp_threads[global_tid]->th.th_ident = loc; |
4018 | #endif |
4019 | __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL); |
4020 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4021 | if (ompt_enabled.enabled) { |
4022 | ompt_frame->enter_frame = ompt_data_none; |
4023 | } |
4024 | #endif |
4025 | |
4026 | } else if (packed_reduction_method == empty_reduce_block) { |
4027 | |
4028 | OMPT_REDUCTION_END; |
4029 | |
4030 | // usage: if team size==1, no synchronization is required (Intel platforms only) |
4031 | |
4032 | // TODO: implicit barrier: should be exposed |
4033 | #if OMPT_SUPPORT |
4034 | ompt_frame_t *ompt_frame; |
4035 | if (ompt_enabled.enabled) { |
4036 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
4037 | if (ompt_frame->enter_frame.ptr == NULL) |
4038 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
4039 | } |
4040 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
4041 | #endif |
4042 | #if USE_ITT_NOTIFY |
4043 | __kmp_threads[global_tid]->th.th_ident = loc; |
4044 | #endif |
4045 | __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL); |
4046 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4047 | if (ompt_enabled.enabled) { |
4048 | ompt_frame->enter_frame = ompt_data_none; |
4049 | } |
4050 | #endif |
4051 | |
4052 | } else if (packed_reduction_method == atomic_reduce_block) { |
4053 | |
4054 | #if OMPT_SUPPORT |
4055 | ompt_frame_t *ompt_frame; |
4056 | if (ompt_enabled.enabled) { |
4057 | __ompt_get_task_info_internal(ancestor_level: 0, NULL, NULL, task_frame: &ompt_frame, NULL, NULL); |
4058 | if (ompt_frame->enter_frame.ptr == NULL) |
4059 | ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
4060 | } |
4061 | OMPT_STORE_RETURN_ADDRESS(global_tid); |
4062 | #endif |
4063 | // TODO: implicit barrier: should be exposed |
4064 | #if USE_ITT_NOTIFY |
4065 | __kmp_threads[global_tid]->th.th_ident = loc; |
4066 | #endif |
4067 | __kmp_barrier(bt: bs_plain_barrier, gtid: global_tid, FALSE, reduce_size: 0, NULL, NULL); |
4068 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4069 | if (ompt_enabled.enabled) { |
4070 | ompt_frame->enter_frame = ompt_data_none; |
4071 | } |
4072 | #endif |
4073 | |
4074 | } else if (TEST_REDUCTION_METHOD(packed_reduction_method, |
4075 | tree_reduce_block)) { |
4076 | |
4077 | // only primary thread executes here (primary releases all other workers) |
4078 | __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), |
4079 | gtid: global_tid); |
4080 | |
4081 | } else { |
4082 | |
4083 | // should never reach this block |
4084 | KMP_ASSERT(0); // "unexpected method" |
4085 | } |
4086 | if (teams_swapped) { |
4087 | __kmp_restore_swapped_teams(th, team, task_state); |
4088 | } |
4089 | |
4090 | if (__kmp_env_consistency_check) |
4091 | __kmp_pop_sync(gtid: global_tid, ct: ct_reduce, ident: loc); |
4092 | |
4093 | KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n" , |
4094 | global_tid, packed_reduction_method)); |
4095 | |
4096 | return; |
4097 | } |
4098 | |
4099 | #undef __KMP_GET_REDUCTION_METHOD |
4100 | #undef __KMP_SET_REDUCTION_METHOD |
4101 | |
4102 | /* end of interface to fast scalable reduce routines */ |
4103 | |
4104 | kmp_uint64 __kmpc_get_taskid() { |
4105 | |
4106 | kmp_int32 gtid; |
4107 | kmp_info_t *thread; |
4108 | |
4109 | gtid = __kmp_get_gtid(); |
4110 | if (gtid < 0) { |
4111 | return 0; |
4112 | } |
4113 | thread = __kmp_thread_from_gtid(gtid); |
4114 | return thread->th.th_current_task->td_task_id; |
4115 | |
4116 | } // __kmpc_get_taskid |
4117 | |
4118 | kmp_uint64 __kmpc_get_parent_taskid() { |
4119 | |
4120 | kmp_int32 gtid; |
4121 | kmp_info_t *thread; |
4122 | kmp_taskdata_t *parent_task; |
4123 | |
4124 | gtid = __kmp_get_gtid(); |
4125 | if (gtid < 0) { |
4126 | return 0; |
4127 | } |
4128 | thread = __kmp_thread_from_gtid(gtid); |
4129 | parent_task = thread->th.th_current_task->td_parent; |
4130 | return (parent_task == NULL ? 0 : parent_task->td_task_id); |
4131 | |
4132 | } // __kmpc_get_parent_taskid |
4133 | |
4134 | /*! |
4135 | @ingroup WORK_SHARING |
4136 | @param loc source location information. |
4137 | @param gtid global thread number. |
4138 | @param num_dims number of associated doacross loops. |
4139 | @param dims info on loops bounds. |
4140 | |
4141 | Initialize doacross loop information. |
4142 | Expect compiler send us inclusive bounds, |
4143 | e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. |
4144 | */ |
4145 | void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, |
4146 | const struct kmp_dim *dims) { |
4147 | __kmp_assert_valid_gtid(gtid); |
4148 | int j, idx; |
4149 | kmp_int64 last, trace_count; |
4150 | kmp_info_t *th = __kmp_threads[gtid]; |
4151 | kmp_team_t *team = th->th.th_team; |
4152 | kmp_uint32 *flags; |
4153 | kmp_disp_t *pr_buf = th->th.th_dispatch; |
4154 | dispatch_shared_info_t *sh_buf; |
4155 | |
4156 | KA_TRACE( |
4157 | 20, |
4158 | ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n" , |
4159 | gtid, num_dims, !team->t.t_serialized)); |
4160 | KMP_DEBUG_ASSERT(dims != NULL); |
4161 | KMP_DEBUG_ASSERT(num_dims > 0); |
4162 | |
4163 | if (team->t.t_serialized) { |
4164 | KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n" )); |
4165 | return; // no dependencies if team is serialized |
4166 | } |
4167 | KMP_DEBUG_ASSERT(team->t.t_nproc > 1); |
4168 | idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for |
4169 | // the next loop |
4170 | sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; |
4171 | |
4172 | // Save bounds info into allocated private buffer |
4173 | KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); |
4174 | pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( |
4175 | th, sizeof(kmp_int64) * (4 * num_dims + 1)); |
4176 | KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); |
4177 | pr_buf->th_doacross_info[0] = |
4178 | (kmp_int64)num_dims; // first element is number of dimensions |
4179 | // Save also address of num_done in order to access it later without knowing |
4180 | // the buffer index |
4181 | pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; |
4182 | pr_buf->th_doacross_info[2] = dims[0].lo; |
4183 | pr_buf->th_doacross_info[3] = dims[0].up; |
4184 | pr_buf->th_doacross_info[4] = dims[0].st; |
4185 | last = 5; |
4186 | for (j = 1; j < num_dims; ++j) { |
4187 | kmp_int64 |
4188 | range_length; // To keep ranges of all dimensions but the first dims[0] |
4189 | if (dims[j].st == 1) { // most common case |
4190 | // AC: should we care of ranges bigger than LLONG_MAX? (not for now) |
4191 | range_length = dims[j].up - dims[j].lo + 1; |
4192 | } else { |
4193 | if (dims[j].st > 0) { |
4194 | KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); |
4195 | range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; |
4196 | } else { // negative increment |
4197 | KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); |
4198 | range_length = |
4199 | (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; |
4200 | } |
4201 | } |
4202 | pr_buf->th_doacross_info[last++] = range_length; |
4203 | pr_buf->th_doacross_info[last++] = dims[j].lo; |
4204 | pr_buf->th_doacross_info[last++] = dims[j].up; |
4205 | pr_buf->th_doacross_info[last++] = dims[j].st; |
4206 | } |
4207 | |
4208 | // Compute total trip count. |
4209 | // Start with range of dims[0] which we don't need to keep in the buffer. |
4210 | if (dims[0].st == 1) { // most common case |
4211 | trace_count = dims[0].up - dims[0].lo + 1; |
4212 | } else if (dims[0].st > 0) { |
4213 | KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); |
4214 | trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; |
4215 | } else { // negative increment |
4216 | KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); |
4217 | trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; |
4218 | } |
4219 | for (j = 1; j < num_dims; ++j) { |
4220 | trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges |
4221 | } |
4222 | KMP_DEBUG_ASSERT(trace_count > 0); |
4223 | |
4224 | // Check if shared buffer is not occupied by other loop (idx - |
4225 | // __kmp_dispatch_num_buffers) |
4226 | if (idx != sh_buf->doacross_buf_idx) { |
4227 | // Shared buffer is occupied, wait for it to be free |
4228 | __kmp_wait_4(spinner: (volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, checker: idx, |
4229 | pred: __kmp_eq_4, NULL); |
4230 | } |
4231 | #if KMP_32_BIT_ARCH |
4232 | // Check if we are the first thread. After the CAS the first thread gets 0, |
4233 | // others get 1 if initialization is in progress, allocated pointer otherwise. |
4234 | // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. |
4235 | flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( |
4236 | (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); |
4237 | #else |
4238 | flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( |
4239 | (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); |
4240 | #endif |
4241 | if (flags == NULL) { |
4242 | // we are the first thread, allocate the array of flags |
4243 | size_t size = |
4244 | (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration |
4245 | flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); |
4246 | KMP_MB(); |
4247 | sh_buf->doacross_flags = flags; |
4248 | } else if (flags == (kmp_uint32 *)1) { |
4249 | #if KMP_32_BIT_ARCH |
4250 | // initialization is still in progress, need to wait |
4251 | while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) |
4252 | #else |
4253 | while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) |
4254 | #endif |
4255 | KMP_YIELD(TRUE); |
4256 | KMP_MB(); |
4257 | } else { |
4258 | KMP_MB(); |
4259 | } |
4260 | KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value |
4261 | pr_buf->th_doacross_flags = |
4262 | sh_buf->doacross_flags; // save private copy in order to not |
4263 | // touch shared buffer on each iteration |
4264 | KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n" , gtid)); |
4265 | } |
4266 | |
4267 | void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { |
4268 | __kmp_assert_valid_gtid(gtid); |
4269 | kmp_int64 shft; |
4270 | size_t num_dims, i; |
4271 | kmp_uint32 flag; |
4272 | kmp_int64 iter_number; // iteration number of "collapsed" loop nest |
4273 | kmp_info_t *th = __kmp_threads[gtid]; |
4274 | kmp_team_t *team = th->th.th_team; |
4275 | kmp_disp_t *pr_buf; |
4276 | kmp_int64 lo, up, st; |
4277 | |
4278 | KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n" , gtid)); |
4279 | if (team->t.t_serialized) { |
4280 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n" )); |
4281 | return; // no dependencies if team is serialized |
4282 | } |
4283 | |
4284 | // calculate sequential iteration number and check out-of-bounds condition |
4285 | pr_buf = th->th.th_dispatch; |
4286 | KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); |
4287 | num_dims = (size_t)pr_buf->th_doacross_info[0]; |
4288 | lo = pr_buf->th_doacross_info[2]; |
4289 | up = pr_buf->th_doacross_info[3]; |
4290 | st = pr_buf->th_doacross_info[4]; |
4291 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4292 | SimpleVLA<ompt_dependence_t> deps(num_dims); |
4293 | #endif |
4294 | if (st == 1) { // most common case |
4295 | if (vec[0] < lo || vec[0] > up) { |
4296 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " |
4297 | "bounds [%lld,%lld]\n" , |
4298 | gtid, vec[0], lo, up)); |
4299 | return; |
4300 | } |
4301 | iter_number = vec[0] - lo; |
4302 | } else if (st > 0) { |
4303 | if (vec[0] < lo || vec[0] > up) { |
4304 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " |
4305 | "bounds [%lld,%lld]\n" , |
4306 | gtid, vec[0], lo, up)); |
4307 | return; |
4308 | } |
4309 | iter_number = (kmp_uint64)(vec[0] - lo) / st; |
4310 | } else { // negative increment |
4311 | if (vec[0] > lo || vec[0] < up) { |
4312 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " |
4313 | "bounds [%lld,%lld]\n" , |
4314 | gtid, vec[0], lo, up)); |
4315 | return; |
4316 | } |
4317 | iter_number = (kmp_uint64)(lo - vec[0]) / (-st); |
4318 | } |
4319 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4320 | deps[0].variable.value = iter_number; |
4321 | deps[0].dependence_type = ompt_dependence_type_sink; |
4322 | #endif |
4323 | for (i = 1; i < num_dims; ++i) { |
4324 | kmp_int64 iter, ln; |
4325 | size_t j = i * 4; |
4326 | ln = pr_buf->th_doacross_info[j + 1]; |
4327 | lo = pr_buf->th_doacross_info[j + 2]; |
4328 | up = pr_buf->th_doacross_info[j + 3]; |
4329 | st = pr_buf->th_doacross_info[j + 4]; |
4330 | if (st == 1) { |
4331 | if (vec[i] < lo || vec[i] > up) { |
4332 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " |
4333 | "bounds [%lld,%lld]\n" , |
4334 | gtid, vec[i], lo, up)); |
4335 | return; |
4336 | } |
4337 | iter = vec[i] - lo; |
4338 | } else if (st > 0) { |
4339 | if (vec[i] < lo || vec[i] > up) { |
4340 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " |
4341 | "bounds [%lld,%lld]\n" , |
4342 | gtid, vec[i], lo, up)); |
4343 | return; |
4344 | } |
4345 | iter = (kmp_uint64)(vec[i] - lo) / st; |
4346 | } else { // st < 0 |
4347 | if (vec[i] > lo || vec[i] < up) { |
4348 | KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " |
4349 | "bounds [%lld,%lld]\n" , |
4350 | gtid, vec[i], lo, up)); |
4351 | return; |
4352 | } |
4353 | iter = (kmp_uint64)(lo - vec[i]) / (-st); |
4354 | } |
4355 | iter_number = iter + ln * iter_number; |
4356 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4357 | deps[i].variable.value = iter; |
4358 | deps[i].dependence_type = ompt_dependence_type_sink; |
4359 | #endif |
4360 | } |
4361 | shft = iter_number % 32; // use 32-bit granularity |
4362 | iter_number >>= 5; // divided by 32 |
4363 | flag = 1 << shft; |
4364 | while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { |
4365 | KMP_YIELD(TRUE); |
4366 | } |
4367 | KMP_MB(); |
4368 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4369 | if (ompt_enabled.ompt_callback_dependences) { |
4370 | ompt_callbacks.ompt_callback(ompt_callback_dependences)( |
4371 | &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); |
4372 | } |
4373 | #endif |
4374 | KA_TRACE(20, |
4375 | ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n" , |
4376 | gtid, (iter_number << 5) + shft)); |
4377 | } |
4378 | |
4379 | void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { |
4380 | __kmp_assert_valid_gtid(gtid); |
4381 | kmp_int64 shft; |
4382 | size_t num_dims, i; |
4383 | kmp_uint32 flag; |
4384 | kmp_int64 iter_number; // iteration number of "collapsed" loop nest |
4385 | kmp_info_t *th = __kmp_threads[gtid]; |
4386 | kmp_team_t *team = th->th.th_team; |
4387 | kmp_disp_t *pr_buf; |
4388 | kmp_int64 lo, st; |
4389 | |
4390 | KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n" , gtid)); |
4391 | if (team->t.t_serialized) { |
4392 | KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n" )); |
4393 | return; // no dependencies if team is serialized |
4394 | } |
4395 | |
4396 | // calculate sequential iteration number (same as in "wait" but no |
4397 | // out-of-bounds checks) |
4398 | pr_buf = th->th.th_dispatch; |
4399 | KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); |
4400 | num_dims = (size_t)pr_buf->th_doacross_info[0]; |
4401 | lo = pr_buf->th_doacross_info[2]; |
4402 | st = pr_buf->th_doacross_info[4]; |
4403 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4404 | SimpleVLA<ompt_dependence_t> deps(num_dims); |
4405 | #endif |
4406 | if (st == 1) { // most common case |
4407 | iter_number = vec[0] - lo; |
4408 | } else if (st > 0) { |
4409 | iter_number = (kmp_uint64)(vec[0] - lo) / st; |
4410 | } else { // negative increment |
4411 | iter_number = (kmp_uint64)(lo - vec[0]) / (-st); |
4412 | } |
4413 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4414 | deps[0].variable.value = iter_number; |
4415 | deps[0].dependence_type = ompt_dependence_type_source; |
4416 | #endif |
4417 | for (i = 1; i < num_dims; ++i) { |
4418 | kmp_int64 iter, ln; |
4419 | size_t j = i * 4; |
4420 | ln = pr_buf->th_doacross_info[j + 1]; |
4421 | lo = pr_buf->th_doacross_info[j + 2]; |
4422 | st = pr_buf->th_doacross_info[j + 4]; |
4423 | if (st == 1) { |
4424 | iter = vec[i] - lo; |
4425 | } else if (st > 0) { |
4426 | iter = (kmp_uint64)(vec[i] - lo) / st; |
4427 | } else { // st < 0 |
4428 | iter = (kmp_uint64)(lo - vec[i]) / (-st); |
4429 | } |
4430 | iter_number = iter + ln * iter_number; |
4431 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4432 | deps[i].variable.value = iter; |
4433 | deps[i].dependence_type = ompt_dependence_type_source; |
4434 | #endif |
4435 | } |
4436 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4437 | if (ompt_enabled.ompt_callback_dependences) { |
4438 | ompt_callbacks.ompt_callback(ompt_callback_dependences)( |
4439 | &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); |
4440 | } |
4441 | #endif |
4442 | shft = iter_number % 32; // use 32-bit granularity |
4443 | iter_number >>= 5; // divided by 32 |
4444 | flag = 1 << shft; |
4445 | KMP_MB(); |
4446 | if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) |
4447 | KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); |
4448 | KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n" , gtid, |
4449 | (iter_number << 5) + shft)); |
4450 | } |
4451 | |
4452 | void __kmpc_doacross_fini(ident_t *loc, int gtid) { |
4453 | __kmp_assert_valid_gtid(gtid); |
4454 | kmp_int32 num_done; |
4455 | kmp_info_t *th = __kmp_threads[gtid]; |
4456 | kmp_team_t *team = th->th.th_team; |
4457 | kmp_disp_t *pr_buf = th->th.th_dispatch; |
4458 | |
4459 | KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n" , gtid)); |
4460 | if (team->t.t_serialized) { |
4461 | KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n" , team)); |
4462 | return; // nothing to do |
4463 | } |
4464 | num_done = |
4465 | KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; |
4466 | if (num_done == th->th.th_team_nproc) { |
4467 | // we are the last thread, need to free shared resources |
4468 | int idx = pr_buf->th_doacross_buf_idx - 1; |
4469 | dispatch_shared_info_t *sh_buf = |
4470 | &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; |
4471 | KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == |
4472 | (kmp_int64)&sh_buf->doacross_num_done); |
4473 | KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); |
4474 | KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); |
4475 | __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); |
4476 | sh_buf->doacross_flags = NULL; |
4477 | sh_buf->doacross_num_done = 0; |
4478 | sh_buf->doacross_buf_idx += |
4479 | __kmp_dispatch_num_buffers; // free buffer for future re-use |
4480 | } |
4481 | // free private resources (need to keep buffer index forever) |
4482 | pr_buf->th_doacross_flags = NULL; |
4483 | __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); |
4484 | pr_buf->th_doacross_info = NULL; |
4485 | KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n" , gtid)); |
4486 | } |
4487 | |
4488 | /* OpenMP 5.1 Memory Management routines */ |
4489 | void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { |
4490 | return __kmp_alloc(__kmp_entry_gtid(), align: 0, sz: size, al: allocator); |
4491 | } |
4492 | |
4493 | void *omp_aligned_alloc(size_t align, size_t size, |
4494 | omp_allocator_handle_t allocator) { |
4495 | return __kmp_alloc(__kmp_entry_gtid(), align, sz: size, al: allocator); |
4496 | } |
4497 | |
4498 | void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { |
4499 | return __kmp_calloc(__kmp_entry_gtid(), align: 0, nmemb, sz: size, al: allocator); |
4500 | } |
4501 | |
4502 | void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size, |
4503 | omp_allocator_handle_t allocator) { |
4504 | return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, sz: size, al: allocator); |
4505 | } |
4506 | |
4507 | void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, |
4508 | omp_allocator_handle_t free_allocator) { |
4509 | return __kmp_realloc(__kmp_entry_gtid(), ptr, sz: size, al: allocator, |
4510 | free_al: free_allocator); |
4511 | } |
4512 | |
4513 | void omp_free(void *ptr, omp_allocator_handle_t allocator) { |
4514 | ___kmpc_free(__kmp_entry_gtid(), ptr, al: allocator); |
4515 | } |
4516 | /* end of OpenMP 5.1 Memory Management routines */ |
4517 | |
4518 | int __kmpc_get_target_offload(void) { |
4519 | if (!__kmp_init_serial) { |
4520 | __kmp_serial_initialize(); |
4521 | } |
4522 | return __kmp_target_offload; |
4523 | } |
4524 | |
4525 | int __kmpc_pause_resource(kmp_pause_status_t level) { |
4526 | if (!__kmp_init_serial) { |
4527 | return 1; // Can't pause if runtime is not initialized |
4528 | } |
4529 | return __kmp_pause_resource(level); |
4530 | } |
4531 | |
4532 | void __kmpc_error(ident_t *loc, int severity, const char *message) { |
4533 | if (!__kmp_init_serial) |
4534 | __kmp_serial_initialize(); |
4535 | |
4536 | KMP_ASSERT(severity == severity_warning || severity == severity_fatal); |
4537 | |
4538 | #if OMPT_SUPPORT |
4539 | if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { |
4540 | ompt_callbacks.ompt_callback(ompt_callback_error)( |
4541 | (ompt_severity_t)severity, message, KMP_STRLEN(s: message), |
4542 | OMPT_GET_RETURN_ADDRESS(0)); |
4543 | } |
4544 | #endif // OMPT_SUPPORT |
4545 | |
4546 | char *src_loc; |
4547 | if (loc && loc->psource) { |
4548 | kmp_str_loc_t str_loc = __kmp_str_loc_init(psource: loc->psource, init_fname: false); |
4549 | src_loc = |
4550 | __kmp_str_format(format: "%s:%d:%d" , str_loc.file, str_loc.line, str_loc.col); |
4551 | __kmp_str_loc_free(loc: &str_loc); |
4552 | } else { |
4553 | src_loc = __kmp_str_format(format: "unknown" ); |
4554 | } |
4555 | |
4556 | if (severity == severity_warning) |
4557 | KMP_WARNING(UserDirectedWarning, src_loc, message); |
4558 | else |
4559 | KMP_FATAL(UserDirectedError, src_loc, message); |
4560 | |
4561 | __kmp_str_free(str: &src_loc); |
4562 | } |
4563 | |
4564 | // Mark begin of scope directive. |
4565 | void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { |
4566 | // reserved is for extension of scope directive and not used. |
4567 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4568 | if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { |
4569 | kmp_team_t *team = __kmp_threads[gtid]->th.th_team; |
4570 | int tid = __kmp_tid_from_gtid(gtid); |
4571 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
4572 | ompt_work_scope, ompt_scope_begin, |
4573 | &(team->t.ompt_team_info.parallel_data), |
4574 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, |
4575 | OMPT_GET_RETURN_ADDRESS(0)); |
4576 | } |
4577 | #endif // OMPT_SUPPORT && OMPT_OPTIONAL |
4578 | } |
4579 | |
4580 | // Mark end of scope directive |
4581 | void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { |
4582 | // reserved is for extension of scope directive and not used. |
4583 | #if OMPT_SUPPORT && OMPT_OPTIONAL |
4584 | if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { |
4585 | kmp_team_t *team = __kmp_threads[gtid]->th.th_team; |
4586 | int tid = __kmp_tid_from_gtid(gtid); |
4587 | ompt_callbacks.ompt_callback(ompt_callback_work)( |
4588 | ompt_work_scope, ompt_scope_end, |
4589 | &(team->t.ompt_team_info.parallel_data), |
4590 | &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, |
4591 | OMPT_GET_RETURN_ADDRESS(0)); |
4592 | } |
4593 | #endif // OMPT_SUPPORT && OMPT_OPTIONAL |
4594 | } |
4595 | |
4596 | #ifdef KMP_USE_VERSION_SYMBOLS |
4597 | // For GOMP compatibility there are two versions of each omp_* API. |
4598 | // One is the plain C symbol and one is the Fortran symbol with an appended |
4599 | // underscore. When we implement a specific ompc_* version of an omp_* |
4600 | // function, we want the plain GOMP versioned symbol to alias the ompc_* version |
4601 | // instead of the Fortran versions in kmp_ftn_entry.h |
4602 | extern "C" { |
4603 | // Have to undef these from omp.h so they aren't translated into |
4604 | // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below |
4605 | #ifdef omp_set_affinity_format |
4606 | #undef omp_set_affinity_format |
4607 | #endif |
4608 | #ifdef omp_get_affinity_format |
4609 | #undef omp_get_affinity_format |
4610 | #endif |
4611 | #ifdef omp_display_affinity |
4612 | #undef omp_display_affinity |
4613 | #endif |
4614 | #ifdef omp_capture_affinity |
4615 | #undef omp_capture_affinity |
4616 | #endif |
4617 | KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50, |
4618 | "OMP_5.0" ); |
4619 | KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50, |
4620 | "OMP_5.0" ); |
4621 | KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50, |
4622 | "OMP_5.0" ); |
4623 | KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50, |
4624 | "OMP_5.0" ); |
4625 | } // extern "C" |
4626 | #endif |
4627 | |