1 | #if USE_ITT_BUILD |
2 | /* |
3 | * kmp_itt.h -- ITT Notify interface. |
4 | */ |
5 | |
6 | //===----------------------------------------------------------------------===// |
7 | // |
8 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
9 | // See https://llvm.org/LICENSE.txt for license information. |
10 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef KMP_ITT_H |
15 | #define KMP_ITT_H |
16 | |
17 | #include "kmp_lock.h" |
18 | |
19 | #define INTEL_ITTNOTIFY_API_PRIVATE |
20 | #include "ittnotify.h" |
21 | #include "legacy/ittnotify.h" |
22 | |
23 | #if KMP_DEBUG |
24 | #define __kmp_inline // Turn off inlining in debug mode. |
25 | #else |
26 | #define __kmp_inline static inline |
27 | #endif |
28 | |
29 | #if USE_ITT_NOTIFY |
30 | extern kmp_int32 __kmp_itt_prepare_delay; |
31 | #ifdef __cplusplus |
32 | extern "C" void __kmp_itt_fini_ittlib(void); |
33 | #else |
34 | extern void __kmp_itt_fini_ittlib(void); |
35 | #endif |
36 | #endif |
37 | |
38 | // Simplify the handling of an argument that is only required when USE_ITT_BUILD |
39 | // is enabled. |
40 | #define USE_ITT_BUILD_ARG(x) , x |
41 | |
42 | void __kmp_itt_initialize(); |
43 | void __kmp_itt_destroy(); |
44 | void __kmp_itt_reset(); |
45 | |
46 | // ----------------------------------------------------------------------------- |
47 | // New stuff for reporting high-level constructs. |
48 | |
49 | // Note the naming convention: |
50 | // __kmp_itt_xxxing() function should be called before action, while |
51 | // __kmp_itt_xxxed() function should be called after action. |
52 | |
53 | // --- Parallel region reporting --- |
54 | __kmp_inline void |
55 | __kmp_itt_region_forking(int gtid, int team_size, |
56 | int barriers); // Primary only, before forking threads. |
57 | __kmp_inline void |
58 | __kmp_itt_region_joined(int gtid); // Primary only, after joining threads. |
59 | // (*) Note: A thread may execute tasks after this point, though. |
60 | |
61 | // --- Frame reporting --- |
62 | // region=0: no regions, region=1: parallel, region=2: serialized parallel |
63 | __kmp_inline void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin, |
64 | __itt_timestamp end, int imbalance, |
65 | ident_t *loc, int team_size, |
66 | int region = 0); |
67 | |
68 | // --- Metadata reporting --- |
69 | // begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated |
70 | // wait time value, reduction -if this is a reduction barrier |
71 | __kmp_inline void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin, |
72 | kmp_uint64 end, |
73 | kmp_uint64 imbalance, |
74 | kmp_uint64 reduction); |
75 | // sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); |
76 | // iterations - loop trip count, chunk - chunk size |
77 | __kmp_inline void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type, |
78 | kmp_uint64 iterations, |
79 | kmp_uint64 chunk); |
80 | __kmp_inline void __kmp_itt_metadata_single(ident_t *loc); |
81 | |
82 | // --- Barrier reporting --- |
83 | __kmp_inline void *__kmp_itt_barrier_object(int gtid, int bt, int set_name = 0, |
84 | int delta = 0); |
85 | __kmp_inline void __kmp_itt_barrier_starting(int gtid, void *object); |
86 | __kmp_inline void __kmp_itt_barrier_middle(int gtid, void *object); |
87 | __kmp_inline void __kmp_itt_barrier_finished(int gtid, void *object); |
88 | |
89 | // --- Taskwait reporting --- |
90 | __kmp_inline void *__kmp_itt_taskwait_object(int gtid); |
91 | __kmp_inline void __kmp_itt_taskwait_starting(int gtid, void *object); |
92 | __kmp_inline void __kmp_itt_taskwait_finished(int gtid, void *object); |
93 | #define KMP_ITT_TASKWAIT_STARTING(obj) \ |
94 | if (UNLIKELY(__itt_sync_create_ptr)) { \ |
95 | obj = __kmp_itt_taskwait_object(gtid); \ |
96 | if (obj != NULL) { \ |
97 | __kmp_itt_taskwait_starting(gtid, obj); \ |
98 | } \ |
99 | } |
100 | #define KMP_ITT_TASKWAIT_FINISHED(obj) \ |
101 | if (UNLIKELY(obj != NULL)) \ |
102 | __kmp_itt_taskwait_finished(gtid, obj); |
103 | |
104 | // --- Task reporting --- |
105 | __kmp_inline void __kmp_itt_task_starting(void *object); |
106 | __kmp_inline void __kmp_itt_task_finished(void *object); |
107 | |
108 | // --- Lock reporting --- |
109 | #if KMP_USE_DYNAMIC_LOCK |
110 | __kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock, |
111 | const ident_t *); |
112 | #else |
113 | __kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock); |
114 | #endif |
115 | __kmp_inline void __kmp_itt_lock_acquiring(kmp_user_lock_p lock); |
116 | __kmp_inline void __kmp_itt_lock_acquired(kmp_user_lock_p lock); |
117 | __kmp_inline void __kmp_itt_lock_releasing(kmp_user_lock_p lock); |
118 | __kmp_inline void __kmp_itt_lock_cancelled(kmp_user_lock_p lock); |
119 | __kmp_inline void __kmp_itt_lock_destroyed(kmp_user_lock_p lock); |
120 | |
121 | // --- Critical reporting --- |
122 | #if KMP_USE_DYNAMIC_LOCK |
123 | __kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock, |
124 | const ident_t *); |
125 | #else |
126 | __kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock); |
127 | #endif |
128 | __kmp_inline void __kmp_itt_critical_acquiring(kmp_user_lock_p lock); |
129 | __kmp_inline void __kmp_itt_critical_acquired(kmp_user_lock_p lock); |
130 | __kmp_inline void __kmp_itt_critical_releasing(kmp_user_lock_p lock); |
131 | __kmp_inline void __kmp_itt_critical_destroyed(kmp_user_lock_p lock); |
132 | |
133 | // --- Single reporting --- |
134 | __kmp_inline void __kmp_itt_single_start(int gtid); |
135 | __kmp_inline void __kmp_itt_single_end(int gtid); |
136 | |
137 | // --- Ordered reporting --- |
138 | __kmp_inline void __kmp_itt_ordered_init(int gtid); |
139 | __kmp_inline void __kmp_itt_ordered_prep(int gtid); |
140 | __kmp_inline void __kmp_itt_ordered_start(int gtid); |
141 | __kmp_inline void __kmp_itt_ordered_end(int gtid); |
142 | |
143 | // --- Threads reporting --- |
144 | __kmp_inline void __kmp_itt_thread_ignore(); |
145 | __kmp_inline void __kmp_itt_thread_name(int gtid); |
146 | |
147 | // --- System objects --- |
148 | __kmp_inline void __kmp_itt_system_object_created(void *object, |
149 | char const *name); |
150 | |
151 | // --- Stack stitching --- |
152 | __kmp_inline __itt_caller __kmp_itt_stack_caller_create(void); |
153 | __kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller); |
154 | __kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller); |
155 | __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller); |
156 | |
157 | // ----------------------------------------------------------------------------- |
158 | // Old stuff for reporting low-level internal synchronization. |
159 | |
160 | #if USE_ITT_NOTIFY |
161 | |
162 | /* Support for SSC marks, which are used by SDE |
163 | http://software.intel.com/en-us/articles/intel-software-development-emulator |
164 | to mark points in instruction traces that represent spin-loops and are |
165 | therefore uninteresting when collecting traces for architecture simulation. |
166 | */ |
167 | #ifndef INCLUDE_SSC_MARKS |
168 | #define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64) |
169 | #endif |
170 | |
171 | /* Linux 64 only for now */ |
172 | #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64) |
173 | // Portable (at least for gcc and icc) code to insert the necessary instructions |
174 | // to set %ebx and execute the unlikely no-op. |
175 | #if defined(__INTEL_COMPILER) |
176 | #define INSERT_SSC_MARK(tag) __SSC_MARK(tag) |
177 | #else |
178 | #define INSERT_SSC_MARK(tag) \ |
179 | __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag) \ |
180 | : "%ebx") |
181 | #endif |
182 | #else |
183 | #define INSERT_SSC_MARK(tag) ((void)0) |
184 | #endif |
185 | |
186 | /* Markers for the start and end of regions that represent polling and are |
187 | therefore uninteresting to architectural simulations 0x4376 and 0x4377 are |
188 | arbitrary numbers that should be unique in the space of SSC tags, but there |
189 | is no central issuing authority rather randomness is expected to work. */ |
190 | #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376) |
191 | #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377) |
192 | |
193 | // Markers for architecture simulation. |
194 | // FORKING : Before the primary thread forks. |
195 | // JOINING : At the start of the join. |
196 | // INVOKING : Before the threads invoke microtasks. |
197 | // DISPATCH_INIT: At the start of dynamically scheduled loop. |
198 | // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop. |
199 | #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693) |
200 | #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694) |
201 | #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695) |
202 | #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696) |
203 | #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697) |
204 | |
205 | // The object is an address that associates a specific set of the prepare, |
206 | // acquire, release, and cancel operations. |
207 | |
208 | /* Sync prepare indicates a thread is going to start waiting for another thread |
209 | to send a release event. This operation should be done just before the |
210 | thread begins checking for the existence of the release event */ |
211 | |
212 | /* Sync cancel indicates a thread is cancelling a wait on another thread and |
213 | continuing execution without waiting for the other thread to release it */ |
214 | |
215 | /* Sync acquired indicates a thread has received a release event from another |
216 | thread and has stopped waiting. This operation must occur only after the |
217 | release event is received. */ |
218 | |
219 | /* Sync release indicates a thread is going to send a release event to another |
220 | thread so it will stop waiting and continue execution. This operation must |
221 | just happen before the release event. */ |
222 | |
223 | #define KMP_FSYNC_PREPARE(obj) __itt_fsync_prepare((void *)(obj)) |
224 | #define KMP_FSYNC_CANCEL(obj) __itt_fsync_cancel((void *)(obj)) |
225 | #define KMP_FSYNC_ACQUIRED(obj) __itt_fsync_acquired((void *)(obj)) |
226 | #define KMP_FSYNC_RELEASING(obj) __itt_fsync_releasing((void *)(obj)) |
227 | |
228 | /* In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called |
229 | with a delay (and not called at all if waiting time is small). So, in spin |
230 | loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before |
231 | spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and |
232 | KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT() for example. */ |
233 | |
234 | #undef KMP_FSYNC_SPIN_INIT |
235 | #define KMP_FSYNC_SPIN_INIT(obj, spin) \ |
236 | int sync_iters = 0; \ |
237 | if (__itt_fsync_prepare_ptr) { \ |
238 | if (obj == NULL) { \ |
239 | obj = spin; \ |
240 | } /* if */ \ |
241 | } /* if */ \ |
242 | SSC_MARK_SPIN_START() |
243 | |
244 | #undef KMP_FSYNC_SPIN_PREPARE |
245 | #define KMP_FSYNC_SPIN_PREPARE(obj) \ |
246 | do { \ |
247 | if (__itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay) { \ |
248 | ++sync_iters; \ |
249 | if (sync_iters >= __kmp_itt_prepare_delay) { \ |
250 | KMP_FSYNC_PREPARE((void *)obj); \ |
251 | } /* if */ \ |
252 | } /* if */ \ |
253 | } while (0) |
254 | #undef KMP_FSYNC_SPIN_ACQUIRED |
255 | #define KMP_FSYNC_SPIN_ACQUIRED(obj) \ |
256 | do { \ |
257 | SSC_MARK_SPIN_END(); \ |
258 | if (sync_iters >= __kmp_itt_prepare_delay) { \ |
259 | KMP_FSYNC_ACQUIRED((void *)obj); \ |
260 | } /* if */ \ |
261 | } while (0) |
262 | |
263 | /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.: |
264 | KMP_ITT_IGNORE( |
265 | ptr = malloc( size ); |
266 | ); |
267 | */ |
268 | #define KMP_ITT_IGNORE(statement) \ |
269 | do { \ |
270 | __itt_state_t __itt_state_; \ |
271 | if (__itt_state_get_ptr) { \ |
272 | __itt_state_ = __itt_state_get(); \ |
273 | __itt_obj_mode_set(__itt_obj_prop_ignore, __itt_obj_state_set); \ |
274 | } /* if */ \ |
275 | { statement } \ |
276 | if (__itt_state_get_ptr) { \ |
277 | __itt_state_set(__itt_state_); \ |
278 | } /* if */ \ |
279 | } while (0) |
280 | |
281 | // Maximum number of frame domains to use (maps to |
282 | // different OpenMP regions in the user source code). |
283 | const int KMP_MAX_FRAME_DOMAINS = 997; |
284 | typedef struct kmp_itthash_entry { |
285 | ident_t *loc; |
286 | int team_size; |
287 | __itt_domain *d; |
288 | struct kmp_itthash_entry *next_in_bucket; |
289 | } kmp_itthash_entry_t; |
290 | typedef struct kmp_itthash { |
291 | kmp_itthash_entry_t *buckets[KMP_MAX_FRAME_DOMAINS]; |
292 | int count; // just a heuristic to limit number of entries |
293 | } kmp_itthash_t; |
294 | extern kmp_itthash_t __kmp_itt_region_domains; |
295 | extern kmp_itthash_t __kmp_itt_barrier_domains; |
296 | extern __itt_domain *metadata_domain; |
297 | extern __itt_string_handle *string_handle_imbl; |
298 | extern __itt_string_handle *string_handle_loop; |
299 | extern __itt_string_handle *string_handle_sngl; |
300 | |
301 | #else |
302 | |
303 | // Null definitions of the synchronization tracing functions. |
304 | #define KMP_FSYNC_PREPARE(obj) ((void)0) |
305 | #define KMP_FSYNC_CANCEL(obj) ((void)0) |
306 | #define KMP_FSYNC_ACQUIRED(obj) ((void)0) |
307 | #define KMP_FSYNC_RELEASING(obj) ((void)0) |
308 | |
309 | #define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0) |
310 | #define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0) |
311 | #define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0) |
312 | |
313 | #define KMP_ITT_IGNORE(stmt) \ |
314 | do { \ |
315 | stmt \ |
316 | } while (0) |
317 | |
318 | #endif // USE_ITT_NOTIFY |
319 | |
320 | #if !KMP_DEBUG |
321 | // In release mode include definitions of inline functions. |
322 | #include "kmp_itt.inl" |
323 | #endif |
324 | |
325 | #endif // KMP_ITT_H |
326 | |
327 | #else /* USE_ITT_BUILD */ |
328 | |
329 | // Null definitions of the synchronization tracing functions. |
330 | // If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either. |
331 | // By defining these we avoid unpleasant ifdef tests in many places. |
332 | #define KMP_FSYNC_PREPARE(obj) ((void)0) |
333 | #define KMP_FSYNC_CANCEL(obj) ((void)0) |
334 | #define KMP_FSYNC_ACQUIRED(obj) ((void)0) |
335 | #define KMP_FSYNC_RELEASING(obj) ((void)0) |
336 | |
337 | #define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0) |
338 | #define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0) |
339 | #define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0) |
340 | |
341 | #define KMP_ITT_IGNORE(stmt) \ |
342 | do { \ |
343 | stmt \ |
344 | } while (0) |
345 | |
346 | #define USE_ITT_BUILD_ARG(x) |
347 | |
348 | #endif /* USE_ITT_BUILD */ |
349 | |