blob: 2062e01c4b137c6e9e99f6218d68410b7f9845fe [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001#if USE_ITT_BUILD
2/*
3 * kmp_itt.h -- ITT Notify interface.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004 */
5
Jim Cownie5e8470a2013-09-27 10:38:44 +00006//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
Jim Cownie5e8470a2013-09-27 10:38:44 +000015#ifndef KMP_ITT_H
16#define KMP_ITT_H
17
18#include "kmp_lock.h"
19
20#define INTEL_ITTNOTIFY_API_PRIVATE
21#include "ittnotify.h"
22#include "legacy/ittnotify.h"
23
24#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +000025#define __kmp_inline // Turn off inlining in debug mode.
Jim Cownie5e8470a2013-09-27 10:38:44 +000026#else
Jonathan Peyton30419822017-05-12 18:01:32 +000027#define __kmp_inline static inline
Jim Cownie5e8470a2013-09-27 10:38:44 +000028#endif
29
30#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +000031extern kmp_int32 __kmp_itt_prepare_delay;
32#ifdef __cplusplus
33extern "C" void __kmp_itt_fini_ittlib(void);
34#else
35extern void __kmp_itt_fini_ittlib(void);
36#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000037#endif
38
Jonathan Peyton30419822017-05-12 18:01:32 +000039// Simplify the handling of an argument that is only required when USE_ITT_BUILD
40// is enabled.
41#define USE_ITT_BUILD_ARG(x) , x
Jim Cownie5e8470a2013-09-27 10:38:44 +000042
43void __kmp_itt_initialize();
44void __kmp_itt_destroy();
Jonathan Peytoneaa9e402018-01-10 18:21:48 +000045void __kmp_itt_reset();
Jim Cownie5e8470a2013-09-27 10:38:44 +000046
Jonathan Peyton30419822017-05-12 18:01:32 +000047// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +000048// New stuff for reporting high-level constructs.
Jim Cownie5e8470a2013-09-27 10:38:44 +000049
50// Note the naming convention:
51// __kmp_itt_xxxing() function should be called before action, while
52// __kmp_itt_xxxed() function should be called after action.
53
54// --- Parallel region reporting ---
Jonathan Peyton30419822017-05-12 18:01:32 +000055__kmp_inline void
56__kmp_itt_region_forking(int gtid, int team_size,
57 int barriers); // Master only, before forking threads.
58__kmp_inline void
59__kmp_itt_region_joined(int gtid); // Master only, after joining threads.
60// (*) Note: A thread may execute tasks after this point, though.
Jim Cownie5e8470a2013-09-27 10:38:44 +000061
Jim Cownie181b4bb2013-12-23 17:28:57 +000062// --- Frame reporting ---
Jonathan Peyton30419822017-05-12 18:01:32 +000063// region=0: no regions, region=1: parallel, region=2: serialized parallel
64__kmp_inline void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
65 __itt_timestamp end, int imbalance,
66 ident_t *loc, int team_size,
67 int region = 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +000068
69// --- Metadata reporting ---
Jonathan Peyton30419822017-05-12 18:01:32 +000070// begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated
71// wait time value, reduction -if this is a reduction barrier
72__kmp_inline void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
73 kmp_uint64 end,
74 kmp_uint64 imbalance,
75 kmp_uint64 reduction);
76// sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others);
77// iterations - loop trip count, chunk - chunk size
78__kmp_inline void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
79 kmp_uint64 iterations,
80 kmp_uint64 chunk);
81__kmp_inline void __kmp_itt_metadata_single(ident_t *loc);
Jim Cownie181b4bb2013-12-23 17:28:57 +000082
Jim Cownie5e8470a2013-09-27 10:38:44 +000083// --- Barrier reporting ---
Jonathan Peyton30419822017-05-12 18:01:32 +000084__kmp_inline void *__kmp_itt_barrier_object(int gtid, int bt, int set_name = 0,
85 int delta = 0);
86__kmp_inline void __kmp_itt_barrier_starting(int gtid, void *object);
87__kmp_inline void __kmp_itt_barrier_middle(int gtid, void *object);
88__kmp_inline void __kmp_itt_barrier_finished(int gtid, void *object);
Jim Cownie5e8470a2013-09-27 10:38:44 +000089
90// --- Taskwait reporting ---
Jonathan Peyton30419822017-05-12 18:01:32 +000091__kmp_inline void *__kmp_itt_taskwait_object(int gtid);
92__kmp_inline void __kmp_itt_taskwait_starting(int gtid, void *object);
93__kmp_inline void __kmp_itt_taskwait_finished(int gtid, void *object);
Jim Cownie5e8470a2013-09-27 10:38:44 +000094
95// --- Task reporting ---
Jonathan Peyton30419822017-05-12 18:01:32 +000096__kmp_inline void __kmp_itt_task_starting(void *object);
97__kmp_inline void __kmp_itt_task_finished(void *object);
Jim Cownie5e8470a2013-09-27 10:38:44 +000098
99// --- Lock reporting ---
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000100#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000101__kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock,
102 const ident_t *);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000103#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000104__kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000105#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000106__kmp_inline void __kmp_itt_lock_acquiring(kmp_user_lock_p lock);
107__kmp_inline void __kmp_itt_lock_acquired(kmp_user_lock_p lock);
108__kmp_inline void __kmp_itt_lock_releasing(kmp_user_lock_p lock);
109__kmp_inline void __kmp_itt_lock_cancelled(kmp_user_lock_p lock);
110__kmp_inline void __kmp_itt_lock_destroyed(kmp_user_lock_p lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000111
112// --- Critical reporting ---
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000113#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000114__kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock,
115 const ident_t *);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000116#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000117__kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000118#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000119__kmp_inline void __kmp_itt_critical_acquiring(kmp_user_lock_p lock);
120__kmp_inline void __kmp_itt_critical_acquired(kmp_user_lock_p lock);
121__kmp_inline void __kmp_itt_critical_releasing(kmp_user_lock_p lock);
122__kmp_inline void __kmp_itt_critical_destroyed(kmp_user_lock_p lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000123
124// --- Single reporting ---
Jonathan Peyton30419822017-05-12 18:01:32 +0000125__kmp_inline void __kmp_itt_single_start(int gtid);
126__kmp_inline void __kmp_itt_single_end(int gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000127
128// --- Ordered reporting ---
Jonathan Peyton30419822017-05-12 18:01:32 +0000129__kmp_inline void __kmp_itt_ordered_init(int gtid);
130__kmp_inline void __kmp_itt_ordered_prep(int gtid);
131__kmp_inline void __kmp_itt_ordered_start(int gtid);
132__kmp_inline void __kmp_itt_ordered_end(int gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000133
134// --- Threads reporting ---
Jonathan Peyton30419822017-05-12 18:01:32 +0000135__kmp_inline void __kmp_itt_thread_ignore();
136__kmp_inline void __kmp_itt_thread_name(int gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000137
138// --- System objects ---
Jonathan Peyton30419822017-05-12 18:01:32 +0000139__kmp_inline void __kmp_itt_system_object_created(void *object,
140 char const *name);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000141
142// --- Stack stitching ---
143__kmp_inline __itt_caller __kmp_itt_stack_caller_create(void);
144__kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller);
145__kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller);
146__kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
147
Jonathan Peyton30419822017-05-12 18:01:32 +0000148// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000149// Old stuff for reporting low-level internal synchronization.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000150
151#if USE_ITT_NOTIFY
152
Jonathan Peyton30419822017-05-12 18:01:32 +0000153/* Support for SSC marks, which are used by SDE
154 http://software.intel.com/en-us/articles/intel-software-development-emulator
155 to mark points in instruction traces that represent spin-loops and are
156 therefore uninteresting when collecting traces for architecture simulation.
157 */
158#ifndef INCLUDE_SSC_MARKS
159#define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64)
160#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000161
Jonathan Peyton30419822017-05-12 18:01:32 +0000162/* Linux 64 only for now */
163#if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64)
164// Portable (at least for gcc and icc) code to insert the necessary instructions
165// to set %ebx and execute the unlikely no-op.
166#if defined(__INTEL_COMPILER)
167#define INSERT_SSC_MARK(tag) __SSC_MARK(tag)
168#else
169#define INSERT_SSC_MARK(tag) \
170 __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag) \
171 : "%ebx")
172#endif
173#else
174#define INSERT_SSC_MARK(tag) ((void)0)
175#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000176
Jonathan Peyton30419822017-05-12 18:01:32 +0000177/* Markers for the start and end of regions that represent polling and are
178 therefore uninteresting to architectural simulations 0x4376 and 0x4377 are
179 arbitrary numbers that should be unique in the space of SSC tags, but there
180 is no central issuing authority rather randomness is expected to work. */
181#define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376)
182#define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000183
Jonathan Peyton30419822017-05-12 18:01:32 +0000184// Markers for architecture simulation.
185// FORKING : Before the master thread forks.
186// JOINING : At the start of the join.
187// INVOKING : Before the threads invoke microtasks.
188// DISPATCH_INIT: At the start of dynamically scheduled loop.
189// DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop.
190#define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693)
191#define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694)
192#define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695)
193#define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696)
194#define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697)
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000195
Jonathan Peyton30419822017-05-12 18:01:32 +0000196// The object is an address that associates a specific set of the prepare,
197// acquire, release, and cancel operations.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000198
Jonathan Peyton30419822017-05-12 18:01:32 +0000199/* Sync prepare indicates a thread is going to start waiting for another thread
200 to send a release event. This operation should be done just before the
201 thread begins checking for the existence of the release event */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000202
Jonathan Peyton30419822017-05-12 18:01:32 +0000203/* Sync cancel indicates a thread is cancelling a wait on another thread and
204 continuing execution without waiting for the other thread to release it */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000205
Jonathan Peyton30419822017-05-12 18:01:32 +0000206/* Sync acquired indicates a thread has received a release event from another
207 thread and has stopped waiting. This operation must occur only after the
208 release event is received. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000209
Jonathan Peyton30419822017-05-12 18:01:32 +0000210/* Sync release indicates a thread is going to send a release event to another
211 thread so it will stop waiting and continue execution. This operation must
212 just happen before the release event. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000213
Jonathan Peyton30419822017-05-12 18:01:32 +0000214#define KMP_FSYNC_PREPARE(obj) __itt_fsync_prepare((void *)(obj))
215#define KMP_FSYNC_CANCEL(obj) __itt_fsync_cancel((void *)(obj))
216#define KMP_FSYNC_ACQUIRED(obj) __itt_fsync_acquired((void *)(obj))
217#define KMP_FSYNC_RELEASING(obj) __itt_fsync_releasing((void *)(obj))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000218
Jonathan Peyton30419822017-05-12 18:01:32 +0000219/* In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called
220 with a delay (and not called at all if waiting time is small). So, in spin
221 loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before
222 spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and
223 KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT_YIELD() for example. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000224
Jonathan Peyton30419822017-05-12 18:01:32 +0000225#undef KMP_FSYNC_SPIN_INIT
226#define KMP_FSYNC_SPIN_INIT(obj, spin) \
227 int sync_iters = 0; \
228 if (__itt_fsync_prepare_ptr) { \
229 if (obj == NULL) { \
230 obj = spin; \
231 } /* if */ \
232 } /* if */ \
233 SSC_MARK_SPIN_START()
Jim Cownie5e8470a2013-09-27 10:38:44 +0000234
Jonathan Peyton30419822017-05-12 18:01:32 +0000235#undef KMP_FSYNC_SPIN_PREPARE
236#define KMP_FSYNC_SPIN_PREPARE(obj) \
237 do { \
238 if (__itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay) { \
239 ++sync_iters; \
240 if (sync_iters >= __kmp_itt_prepare_delay) { \
241 KMP_FSYNC_PREPARE((void *)obj); \
242 } /* if */ \
243 } /* if */ \
244 } while (0)
245#undef KMP_FSYNC_SPIN_ACQUIRED
246#define KMP_FSYNC_SPIN_ACQUIRED(obj) \
247 do { \
248 SSC_MARK_SPIN_END(); \
249 if (sync_iters >= __kmp_itt_prepare_delay) { \
250 KMP_FSYNC_ACQUIRED((void *)obj); \
251 } /* if */ \
252 } while (0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000253
Jonathan Peyton30419822017-05-12 18:01:32 +0000254/* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.:
255 KMP_ITT_IGNORE(
256 ptr = malloc( size );
257 );
258*/
259#define KMP_ITT_IGNORE(statement) \
260 do { \
261 __itt_state_t __itt_state_; \
262 if (__itt_state_get_ptr) { \
263 __itt_state_ = __itt_state_get(); \
264 __itt_obj_mode_set(__itt_obj_prop_ignore, __itt_obj_state_set); \
265 } /* if */ \
266 { statement } \
267 if (__itt_state_get_ptr) { \
268 __itt_state_set(__itt_state_); \
269 } /* if */ \
270 } while (0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000271
Jonathan Peyton30419822017-05-12 18:01:32 +0000272const int KMP_MAX_FRAME_DOMAINS =
273 512; // Maximum number of frame domains to use (maps to
274// different OpenMP regions in the user source code).
275extern kmp_int32 __kmp_barrier_domain_count;
276extern kmp_int32 __kmp_region_domain_count;
277extern __itt_domain *__kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS];
278extern __itt_domain *__kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS];
279extern __itt_domain *__kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS];
280extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS];
281extern __itt_domain *metadata_domain;
282extern __itt_string_handle *string_handle_imbl;
283extern __itt_string_handle *string_handle_loop;
284extern __itt_string_handle *string_handle_sngl;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000285
Jim Cownie5e8470a2013-09-27 10:38:44 +0000286#else
287
288// Null definitions of the synchronization tracing functions.
Jonathan Peyton30419822017-05-12 18:01:32 +0000289#define KMP_FSYNC_PREPARE(obj) ((void)0)
290#define KMP_FSYNC_CANCEL(obj) ((void)0)
291#define KMP_FSYNC_ACQUIRED(obj) ((void)0)
292#define KMP_FSYNC_RELEASING(obj) ((void)0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000293
Jonathan Peyton30419822017-05-12 18:01:32 +0000294#define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
295#define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
296#define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000297
Jonathan Peyton30419822017-05-12 18:01:32 +0000298#define KMP_ITT_IGNORE(stmt) \
299 do { \
300 stmt \
301 } while (0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000302
303#endif // USE_ITT_NOTIFY
304
Jonathan Peyton30419822017-05-12 18:01:32 +0000305#if !KMP_DEBUG
306// In release mode include definitions of inline functions.
307#include "kmp_itt.inl"
Jim Cownie5e8470a2013-09-27 10:38:44 +0000308#endif
309
310#endif // KMP_ITT_H
311
Jonathan Peyton30419822017-05-12 18:01:32 +0000312#else /* USE_ITT_BUILD */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000313
314// Null definitions of the synchronization tracing functions.
315// If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either.
316// By defining these we avoid unpleasant ifdef tests in many places.
Jonathan Peyton30419822017-05-12 18:01:32 +0000317#define KMP_FSYNC_PREPARE(obj) ((void)0)
318#define KMP_FSYNC_CANCEL(obj) ((void)0)
319#define KMP_FSYNC_ACQUIRED(obj) ((void)0)
320#define KMP_FSYNC_RELEASING(obj) ((void)0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000321
Jonathan Peyton30419822017-05-12 18:01:32 +0000322#define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
323#define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
324#define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000325
Jonathan Peyton30419822017-05-12 18:01:32 +0000326#define KMP_ITT_IGNORE(stmt) \
327 do { \
328 stmt \
329 } while (0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000330
Jonathan Peyton30419822017-05-12 18:01:32 +0000331#define USE_ITT_BUILD_ARG(x)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000332
333#endif /* USE_ITT_BUILD */