1 | /* |
2 | * ompt-specific.cpp -- OMPT internal functions |
3 | */ |
4 | |
5 | //===----------------------------------------------------------------------===// |
6 | // |
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | // See https://llvm.org/LICENSE.txt for license information. |
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | //****************************************************************************** |
14 | // include files |
15 | //****************************************************************************** |
16 | |
17 | #include "kmp.h" |
18 | #include "ompt-specific.h" |
19 | |
20 | #if KMP_OS_UNIX |
21 | #include <dlfcn.h> |
22 | #endif |
23 | |
24 | #if KMP_OS_WINDOWS |
25 | #define THREAD_LOCAL __declspec(thread) |
26 | #else |
27 | #define THREAD_LOCAL __thread |
28 | #endif |
29 | |
30 | #define OMPT_WEAK_ATTRIBUTE KMP_WEAK_ATTRIBUTE_INTERNAL |
31 | |
32 | //****************************************************************************** |
33 | // macros |
34 | //****************************************************************************** |
35 | |
36 | #define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info |
37 | |
38 | #define OMPT_THREAD_ID_BITS 16 |
39 | |
40 | //****************************************************************************** |
41 | // private operations |
42 | //****************************************************************************** |
43 | |
44 | //---------------------------------------------------------- |
45 | // traverse the team and task hierarchy |
46 | // note: __ompt_get_teaminfo and __ompt_get_task_info_object |
47 | // traverse the hierarchy similarly and need to be |
48 | // kept consistent |
49 | //---------------------------------------------------------- |
50 | |
51 | ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) { |
52 | kmp_info_t *thr = ompt_get_thread(); |
53 | |
54 | if (thr) { |
55 | kmp_team *team = thr->th.th_team; |
56 | if (team == NULL) |
57 | return NULL; |
58 | |
59 | ompt_lw_taskteam_t *next_lwt = LWT_FROM_TEAM(team), *lwt = NULL; |
60 | |
61 | while (depth > 0) { |
62 | // next lightweight team (if any) |
63 | if (lwt) |
64 | lwt = lwt->parent; |
65 | |
66 | // next heavyweight team (if any) after |
67 | // lightweight teams are exhausted |
68 | if (!lwt && team) { |
69 | if (next_lwt) { |
70 | lwt = next_lwt; |
71 | next_lwt = NULL; |
72 | } else { |
73 | team = team->t.t_parent; |
74 | if (team) { |
75 | next_lwt = LWT_FROM_TEAM(team); |
76 | } |
77 | } |
78 | } |
79 | |
80 | depth--; |
81 | } |
82 | |
83 | if (lwt) { |
84 | // lightweight teams have one task |
85 | if (size) |
86 | *size = 1; |
87 | |
88 | // return team info for lightweight team |
89 | return &lwt->ompt_team_info; |
90 | } else if (team) { |
91 | // extract size from heavyweight team |
92 | if (size) |
93 | *size = team->t.t_nproc; |
94 | |
95 | // return team info for heavyweight team |
96 | return &team->t.ompt_team_info; |
97 | } |
98 | } |
99 | |
100 | return NULL; |
101 | } |
102 | |
103 | ompt_task_info_t *__ompt_get_task_info_object(int depth) { |
104 | ompt_task_info_t *info = NULL; |
105 | kmp_info_t *thr = ompt_get_thread(); |
106 | |
107 | if (thr) { |
108 | kmp_taskdata_t *taskdata = thr->th.th_current_task; |
109 | ompt_lw_taskteam_t *lwt = NULL, |
110 | *next_lwt = LWT_FROM_TEAM(taskdata->td_team); |
111 | |
112 | while (depth > 0) { |
113 | // next lightweight team (if any) |
114 | if (lwt) |
115 | lwt = lwt->parent; |
116 | |
117 | // next heavyweight team (if any) after |
118 | // lightweight teams are exhausted |
119 | if (!lwt && taskdata) { |
120 | if (next_lwt) { |
121 | lwt = next_lwt; |
122 | next_lwt = NULL; |
123 | } else { |
124 | taskdata = taskdata->td_parent; |
125 | if (taskdata) { |
126 | next_lwt = LWT_FROM_TEAM(taskdata->td_team); |
127 | } |
128 | } |
129 | } |
130 | depth--; |
131 | } |
132 | |
133 | if (lwt) { |
134 | info = &lwt->ompt_task_info; |
135 | } else if (taskdata) { |
136 | info = &taskdata->ompt_task_info; |
137 | } |
138 | } |
139 | |
140 | return info; |
141 | } |
142 | |
143 | ompt_task_info_t *__ompt_get_scheduling_taskinfo(int depth) { |
144 | ompt_task_info_t *info = NULL; |
145 | kmp_info_t *thr = ompt_get_thread(); |
146 | |
147 | if (thr) { |
148 | kmp_taskdata_t *taskdata = thr->th.th_current_task; |
149 | |
150 | ompt_lw_taskteam_t *lwt = NULL, |
151 | *next_lwt = LWT_FROM_TEAM(taskdata->td_team); |
152 | |
153 | while (depth > 0) { |
154 | // next lightweight team (if any) |
155 | if (lwt) |
156 | lwt = lwt->parent; |
157 | |
158 | // next heavyweight team (if any) after |
159 | // lightweight teams are exhausted |
160 | if (!lwt && taskdata) { |
161 | // first try scheduling parent (for explicit task scheduling) |
162 | if (taskdata->ompt_task_info.scheduling_parent) { |
163 | taskdata = taskdata->ompt_task_info.scheduling_parent; |
164 | } else if (next_lwt) { |
165 | lwt = next_lwt; |
166 | next_lwt = NULL; |
167 | } else { |
168 | // then go for implicit tasks |
169 | taskdata = taskdata->td_parent; |
170 | if (taskdata) { |
171 | next_lwt = LWT_FROM_TEAM(taskdata->td_team); |
172 | } |
173 | } |
174 | } |
175 | depth--; |
176 | } |
177 | |
178 | if (lwt) { |
179 | info = &lwt->ompt_task_info; |
180 | } else if (taskdata) { |
181 | info = &taskdata->ompt_task_info; |
182 | } |
183 | } |
184 | |
185 | return info; |
186 | } |
187 | |
188 | //****************************************************************************** |
189 | // interface operations |
190 | //****************************************************************************** |
191 | //---------------------------------------------------------- |
192 | // initialization support |
193 | //---------------------------------------------------------- |
194 | |
195 | void __ompt_force_initialization() { __kmp_serial_initialize(); } |
196 | |
197 | //---------------------------------------------------------- |
198 | // thread support |
199 | //---------------------------------------------------------- |
200 | |
201 | ompt_data_t *__ompt_get_thread_data_internal() { |
202 | if (__kmp_get_gtid() >= 0) { |
203 | kmp_info_t *thread = ompt_get_thread(); |
204 | if (thread == NULL) |
205 | return NULL; |
206 | return &(thread->th.ompt_thread_info.thread_data); |
207 | } |
208 | return NULL; |
209 | } |
210 | |
211 | //---------------------------------------------------------- |
212 | // state support |
213 | //---------------------------------------------------------- |
214 | |
215 | void __ompt_thread_assign_wait_id(void *variable) { |
216 | kmp_info_t *ti = ompt_get_thread(); |
217 | |
218 | if (ti) |
219 | ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)(uintptr_t)variable; |
220 | } |
221 | |
222 | int __ompt_get_state_internal(ompt_wait_id_t *omp_wait_id) { |
223 | kmp_info_t *ti = ompt_get_thread(); |
224 | |
225 | if (ti) { |
226 | if (omp_wait_id) |
227 | *omp_wait_id = ti->th.ompt_thread_info.wait_id; |
228 | return ti->th.ompt_thread_info.state; |
229 | } |
230 | return ompt_state_undefined; |
231 | } |
232 | |
233 | //---------------------------------------------------------- |
234 | // parallel region support |
235 | //---------------------------------------------------------- |
236 | |
237 | int __ompt_get_parallel_info_internal(int ancestor_level, |
238 | ompt_data_t **parallel_data, |
239 | int *team_size) { |
240 | if (__kmp_get_gtid() >= 0) { |
241 | ompt_team_info_t *info; |
242 | if (team_size) { |
243 | info = __ompt_get_teaminfo(depth: ancestor_level, size: team_size); |
244 | } else { |
245 | info = __ompt_get_teaminfo(depth: ancestor_level, NULL); |
246 | } |
247 | if (parallel_data) { |
248 | *parallel_data = info ? &(info->parallel_data) : NULL; |
249 | } |
250 | return info ? 2 : 0; |
251 | } else { |
252 | return 0; |
253 | } |
254 | } |
255 | |
256 | //---------------------------------------------------------- |
257 | // lightweight task team support |
258 | //---------------------------------------------------------- |
259 | |
260 | void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, |
261 | ompt_data_t *ompt_pid, void *codeptr) { |
262 | // initialize parallel_data with input, return address to parallel_data on |
263 | // exit |
264 | lwt->ompt_team_info.parallel_data = *ompt_pid; |
265 | lwt->ompt_team_info.master_return_address = codeptr; |
266 | lwt->ompt_task_info.task_data.value = 0; |
267 | lwt->ompt_task_info.frame.enter_frame = ompt_data_none; |
268 | lwt->ompt_task_info.frame.exit_frame = ompt_data_none; |
269 | lwt->ompt_task_info.frame.enter_frame_flags = OMPT_FRAME_FLAGS_RUNTIME; |
270 | lwt->ompt_task_info.frame.exit_frame_flags = OMPT_FRAME_FLAGS_RUNTIME; |
271 | lwt->ompt_task_info.scheduling_parent = NULL; |
272 | lwt->heap = 0; |
273 | lwt->parent = 0; |
274 | } |
275 | |
276 | void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, |
277 | int on_heap, bool always) { |
278 | ompt_lw_taskteam_t *link_lwt = lwt; |
279 | if (always || |
280 | thr->th.th_team->t.t_serialized > |
281 | 1) { // we already have a team, so link the new team and swap values |
282 | if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap |
283 | link_lwt = |
284 | (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); |
285 | } |
286 | link_lwt->heap = on_heap; |
287 | |
288 | // would be swap in the (on_stack) case. |
289 | ompt_team_info_t tmp_team = lwt->ompt_team_info; |
290 | link_lwt->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr); |
291 | *OMPT_CUR_TEAM_INFO(thr) = tmp_team; |
292 | |
293 | // link the taskteam into the list of taskteams: |
294 | ompt_lw_taskteam_t *my_parent = |
295 | thr->th.th_team->t.ompt_serialized_team_info; |
296 | link_lwt->parent = my_parent; |
297 | thr->th.th_team->t.ompt_serialized_team_info = link_lwt; |
298 | #if OMPD_SUPPORT |
299 | if (ompd_state & OMPD_ENABLE_BP) { |
300 | ompd_bp_parallel_begin(); |
301 | } |
302 | #endif |
303 | |
304 | ompt_task_info_t tmp_task = lwt->ompt_task_info; |
305 | link_lwt->ompt_task_info = *OMPT_CUR_TASK_INFO(thr); |
306 | *OMPT_CUR_TASK_INFO(thr) = tmp_task; |
307 | } else { |
308 | // this is the first serialized team, so we just store the values in the |
309 | // team and drop the taskteam-object |
310 | *OMPT_CUR_TEAM_INFO(thr) = lwt->ompt_team_info; |
311 | #if OMPD_SUPPORT |
312 | if (ompd_state & OMPD_ENABLE_BP) { |
313 | ompd_bp_parallel_begin(); |
314 | } |
315 | #endif |
316 | *OMPT_CUR_TASK_INFO(thr) = lwt->ompt_task_info; |
317 | } |
318 | } |
319 | |
320 | void __ompt_lw_taskteam_unlink(kmp_info_t *thr) { |
321 | ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info; |
322 | if (lwtask) { |
323 | ompt_task_info_t tmp_task = lwtask->ompt_task_info; |
324 | lwtask->ompt_task_info = *OMPT_CUR_TASK_INFO(thr); |
325 | *OMPT_CUR_TASK_INFO(thr) = tmp_task; |
326 | #if OMPD_SUPPORT |
327 | if (ompd_state & OMPD_ENABLE_BP) { |
328 | ompd_bp_parallel_end(); |
329 | } |
330 | #endif |
331 | thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent; |
332 | |
333 | ompt_team_info_t tmp_team = lwtask->ompt_team_info; |
334 | lwtask->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr); |
335 | *OMPT_CUR_TEAM_INFO(thr) = tmp_team; |
336 | |
337 | if (lwtask->heap) { |
338 | __kmp_free(lwtask); |
339 | lwtask = NULL; |
340 | } |
341 | } |
342 | // return lwtask; |
343 | } |
344 | |
345 | //---------------------------------------------------------- |
346 | // task support |
347 | //---------------------------------------------------------- |
348 | |
349 | ompt_data_t *__ompt_get_task_data() { |
350 | kmp_info_t *thr = ompt_get_thread(); |
351 | ompt_data_t *task_data = thr ? OMPT_CUR_TASK_DATA(thr) : NULL; |
352 | return task_data; |
353 | } |
354 | |
355 | ompt_data_t *__ompt_get_target_task_data() { |
356 | return &__kmp_threads[__kmp_get_gtid()]->th.ompt_thread_info.target_task_data; |
357 | } |
358 | |
359 | int __ompt_get_task_info_internal(int ancestor_level, int *type, |
360 | ompt_data_t **task_data, |
361 | ompt_frame_t **task_frame, |
362 | ompt_data_t **parallel_data, |
363 | int *thread_num) { |
364 | if (__kmp_get_gtid() < 0) |
365 | return 0; |
366 | |
367 | if (ancestor_level < 0) |
368 | return 0; |
369 | |
370 | // copied from __ompt_get_scheduling_taskinfo |
371 | ompt_task_info_t *info = NULL; |
372 | ompt_team_info_t *team_info = NULL; |
373 | kmp_info_t *thr = ompt_get_thread(); |
374 | int level = ancestor_level; |
375 | |
376 | if (thr) { |
377 | kmp_taskdata_t *taskdata = thr->th.th_current_task; |
378 | if (taskdata == NULL) |
379 | return 0; |
380 | kmp_team *team = thr->th.th_team, *prev_team = NULL; |
381 | if (team == NULL) |
382 | return 0; |
383 | ompt_lw_taskteam_t *lwt = NULL, |
384 | *next_lwt = LWT_FROM_TEAM(taskdata->td_team); |
385 | |
386 | while (ancestor_level > 0) { |
387 | // next lightweight team (if any) |
388 | if (lwt) |
389 | lwt = lwt->parent; |
390 | |
391 | // next heavyweight team (if any) after |
392 | // lightweight teams are exhausted |
393 | if (!lwt && taskdata) { |
394 | // first try scheduling parent (for explicit task scheduling) |
395 | if (taskdata->ompt_task_info.scheduling_parent) { |
396 | taskdata = taskdata->ompt_task_info.scheduling_parent; |
397 | } else if (next_lwt) { |
398 | lwt = next_lwt; |
399 | next_lwt = NULL; |
400 | } else { |
401 | // then go for implicit tasks |
402 | taskdata = taskdata->td_parent; |
403 | if (team == NULL) |
404 | return 0; |
405 | prev_team = team; |
406 | team = team->t.t_parent; |
407 | if (taskdata) { |
408 | next_lwt = LWT_FROM_TEAM(taskdata->td_team); |
409 | } |
410 | } |
411 | } |
412 | ancestor_level--; |
413 | } |
414 | |
415 | if (lwt) { |
416 | info = &lwt->ompt_task_info; |
417 | team_info = &lwt->ompt_team_info; |
418 | if (type) { |
419 | *type = ompt_task_implicit; |
420 | } |
421 | } else if (taskdata) { |
422 | info = &taskdata->ompt_task_info; |
423 | team_info = &team->t.ompt_team_info; |
424 | if (type) { |
425 | if (taskdata->td_parent) { |
426 | *type = TASK_TYPE_DETAILS_FORMAT(taskdata); |
427 | } else { |
428 | *type = ompt_task_initial; |
429 | } |
430 | } |
431 | } |
432 | if (task_data) { |
433 | *task_data = info ? &info->task_data : NULL; |
434 | } |
435 | if (task_frame) { |
436 | // OpenMP spec asks for the scheduling task to be returned. |
437 | *task_frame = info ? &info->frame : NULL; |
438 | } |
439 | if (parallel_data) { |
440 | *parallel_data = team_info ? &(team_info->parallel_data) : NULL; |
441 | } |
442 | if (thread_num) { |
443 | if (level == 0) |
444 | *thread_num = __kmp_get_tid(); |
445 | else if (lwt) |
446 | *thread_num = 0; |
447 | else if (!prev_team) { |
448 | // The innermost parallel region contains at least one explicit task. |
449 | // The task at level > 0 is either an implicit task that |
450 | // corresponds to the mentioned region or one of the explicit tasks |
451 | // nested inside the same region. Note that the task isn't the |
452 | // innermost explicit tasks (because of condition level > 0). |
453 | // Since the task at this level still belongs to the innermost parallel |
454 | // region, thread_num is determined the same way as for level==0. |
455 | *thread_num = __kmp_get_tid(); |
456 | } else |
457 | *thread_num = prev_team->t.t_master_tid; |
458 | // *thread_num = team->t.t_master_tid; |
459 | } |
460 | return info ? 2 : 0; |
461 | } |
462 | return 0; |
463 | } |
464 | |
465 | int __ompt_get_task_memory_internal(void **addr, size_t *size, int blocknum) { |
466 | *size = 0; |
467 | if (blocknum != 0) |
468 | return 0; // support only a single block |
469 | |
470 | kmp_info_t *thr = ompt_get_thread(); |
471 | if (!thr) |
472 | return 0; |
473 | |
474 | kmp_taskdata_t *taskdata = thr->th.th_current_task; |
475 | |
476 | if (taskdata->td_flags.tasktype != TASK_EXPLICIT) |
477 | return 0; // support only explicit task |
478 | |
479 | *addr = taskdata; |
480 | *size = taskdata->td_size_alloc; |
481 | return 0; |
482 | } |
483 | |
484 | //---------------------------------------------------------- |
485 | // team support |
486 | //---------------------------------------------------------- |
487 | |
488 | void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid) { |
489 | team->t.ompt_team_info.parallel_data = ompt_pid; |
490 | } |
491 | |
492 | //---------------------------------------------------------- |
493 | // misc |
494 | //---------------------------------------------------------- |
495 | |
496 | static uint64_t __ompt_get_unique_id_internal() { |
497 | static uint64_t thread = 1; |
498 | static THREAD_LOCAL uint64_t ID = 0; |
499 | if (ID == 0) { |
500 | uint64_t new_thread = KMP_TEST_THEN_INC64((kmp_int64 *)&thread); |
501 | ID = new_thread << (sizeof(uint64_t) * 8 - OMPT_THREAD_ID_BITS); |
502 | } |
503 | return ++ID; |
504 | } |
505 | |
506 | ompt_sync_region_t __ompt_get_barrier_kind(enum barrier_type bt, |
507 | kmp_info_t *thr) { |
508 | if (bt == bs_forkjoin_barrier) { |
509 | if (thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league) |
510 | return ompt_sync_region_barrier_teams; |
511 | else |
512 | return ompt_sync_region_barrier_implicit_parallel; |
513 | } |
514 | |
515 | if (bt != bs_plain_barrier || !thr->th.th_ident) |
516 | return ompt_sync_region_barrier_implementation; |
517 | |
518 | kmp_int32 flags = thr->th.th_ident->flags; |
519 | |
520 | if ((flags & KMP_IDENT_BARRIER_EXPL) != 0) |
521 | return ompt_sync_region_barrier_explicit; |
522 | |
523 | if ((flags & KMP_IDENT_BARRIER_IMPL) != 0) |
524 | return ompt_sync_region_barrier_implicit_workshare; |
525 | |
526 | return ompt_sync_region_barrier_implementation; |
527 | } |
528 | |