blob: 286023d7fd0c18f343c668c621c207cc7d88b5ff [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001#if USE_ITT_BUILD
2/*
3 * kmp_itt.h -- ITT Notify interface.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004 */
5
6
7//===----------------------------------------------------------------------===//
8//
9// The LLVM Compiler Infrastructure
10//
11// This file is dual licensed under the MIT and the University of Illinois Open
12// Source Licenses. See LICENSE.txt for details.
13//
14//===----------------------------------------------------------------------===//
15
16
17#ifndef KMP_ITT_H
18#define KMP_ITT_H
19
20#include "kmp_lock.h"
21
22#define INTEL_ITTNOTIFY_API_PRIVATE
23#include "ittnotify.h"
24#include "legacy/ittnotify.h"
25
26#if KMP_DEBUG
27 #define __kmp_inline // Turn off inlining in debug mode.
28#else
29 #define __kmp_inline static inline
30#endif
31
32#if USE_ITT_NOTIFY
33 extern kmp_int32 __kmp_itt_prepare_delay;
34# ifdef __cplusplus
35 extern "C" void __kmp_itt_fini_ittlib(void);
36# else
37 extern void __kmp_itt_fini_ittlib(void);
38# endif
39#endif
40
41// Simplify the handling of an argument that is only required when USE_ITT_BUILD is enabled.
42#define USE_ITT_BUILD_ARG(x) ,x
43
44void __kmp_itt_initialize();
45void __kmp_itt_destroy();
46
47// -------------------------------------------------------------------------------------------------
48// New stuff for reporting high-level constructs.
49// -------------------------------------------------------------------------------------------------
50
51// Note the naming convention:
52// __kmp_itt_xxxing() function should be called before action, while
53// __kmp_itt_xxxed() function should be called after action.
54
55// --- Parallel region reporting ---
Jonathan Peytona1202bf2016-04-19 16:55:17 +000056__kmp_inline void __kmp_itt_region_forking( int gtid, int team_size, int barriers ); // Master only, before forking threads.
57__kmp_inline void __kmp_itt_region_joined( int gtid ); // Master only, after joining threads.
Jim Cownie5e8470a2013-09-27 10:38:44 +000058 // (*) Note: A thread may execute tasks after this point, though.
59
Jim Cownie181b4bb2013-12-23 17:28:57 +000060// --- Frame reporting ---
Jim Cownie4cc4bb42014-10-07 16:25:50 +000061// region = 0 - no regions, region = 1 - parallel, region = 2 - serialized parallel
62__kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc, int team_size, int region = 0 );
63
64// --- Metadata reporting ---
65// begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated wait time value, reduction -if this is a reduction barrier
66__kmp_inline void __kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction );
67// sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); iterations - loop trip count, chunk - chunk size
68__kmp_inline void __kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk );
Andrey Churbanovf6451d92015-01-16 15:58:03 +000069__kmp_inline void __kmp_itt_metadata_single( ident_t * loc );
Jim Cownie181b4bb2013-12-23 17:28:57 +000070
Jim Cownie5e8470a2013-09-27 10:38:44 +000071// --- Barrier reporting ---
72__kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 );
73__kmp_inline void __kmp_itt_barrier_starting( int gtid, void * object );
74__kmp_inline void __kmp_itt_barrier_middle( int gtid, void * object );
75__kmp_inline void __kmp_itt_barrier_finished( int gtid, void * object );
76
77// --- Taskwait reporting ---
78__kmp_inline void * __kmp_itt_taskwait_object( int gtid );
79__kmp_inline void __kmp_itt_taskwait_starting( int gtid, void * object );
80__kmp_inline void __kmp_itt_taskwait_finished( int gtid, void * object );
81
82// --- Task reporting ---
83__kmp_inline void __kmp_itt_task_starting( void * object );
84__kmp_inline void __kmp_itt_task_finished( void * object );
85
86// --- Lock reporting ---
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000087#if KMP_USE_DYNAMIC_LOCK
88__kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t * );
89#else
Jim Cownie5e8470a2013-09-27 10:38:44 +000090__kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000091#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000092__kmp_inline void __kmp_itt_lock_acquiring( kmp_user_lock_p lock );
93__kmp_inline void __kmp_itt_lock_acquired( kmp_user_lock_p lock );
94__kmp_inline void __kmp_itt_lock_releasing( kmp_user_lock_p lock );
95__kmp_inline void __kmp_itt_lock_cancelled( kmp_user_lock_p lock );
96__kmp_inline void __kmp_itt_lock_destroyed( kmp_user_lock_p lock );
97
98// --- Critical reporting ---
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000099#if KMP_USE_DYNAMIC_LOCK
100__kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t * );
101#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000102__kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000103#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000104__kmp_inline void __kmp_itt_critical_acquiring( kmp_user_lock_p lock );
105__kmp_inline void __kmp_itt_critical_acquired( kmp_user_lock_p lock );
106__kmp_inline void __kmp_itt_critical_releasing( kmp_user_lock_p lock );
107__kmp_inline void __kmp_itt_critical_destroyed( kmp_user_lock_p lock );
108
109// --- Single reporting ---
110__kmp_inline void __kmp_itt_single_start( int gtid );
111__kmp_inline void __kmp_itt_single_end( int gtid );
112
113// --- Ordered reporting ---
114__kmp_inline void __kmp_itt_ordered_init( int gtid );
115__kmp_inline void __kmp_itt_ordered_prep( int gtid );
116__kmp_inline void __kmp_itt_ordered_start( int gtid );
117__kmp_inline void __kmp_itt_ordered_end( int gtid );
118
119// --- Threads reporting ---
120__kmp_inline void __kmp_itt_thread_ignore();
121__kmp_inline void __kmp_itt_thread_name( int gtid );
122
123// --- System objects ---
124__kmp_inline void __kmp_itt_system_object_created( void * object, char const * name );
125
126// --- Stack stitching ---
127__kmp_inline __itt_caller __kmp_itt_stack_caller_create(void);
128__kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller);
129__kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller);
130__kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
131
132// -------------------------------------------------------------------------------------------------
133// Old stuff for reporting low-level internal synchronization.
134// -------------------------------------------------------------------------------------------------
135
136#if USE_ITT_NOTIFY
137
138 /*
139 * Support for SSC marks, which are used by SDE
140 * http://software.intel.com/en-us/articles/intel-software-development-emulator
141 * to mark points in instruction traces that represent spin-loops and are
142 * therefore uninteresting when collecting traces for architecture simulation.
143 */
144 #ifndef INCLUDE_SSC_MARKS
145 # define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64)
146 #endif
147
148 /* Linux 64 only for now */
149 #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64)
150 // Portable (at least for gcc and icc) code to insert the necessary instructions
151 // to set %ebx and execute the unlikely no-op.
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000152 #if defined( __INTEL_COMPILER )
153 # define INSERT_SSC_MARK(tag) __SSC_MARK(tag)
154 #else
155 # define INSERT_SSC_MARK(tag) \
156 __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag):"%ebx")
157 #endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000158 #else
159 # define INSERT_SSC_MARK(tag) ((void)0)
160 #endif
161
162 /* Markers for the start and end of regions that represent polling and
163 * are therefore uninteresting to architectural simulations 0x4376 and
164 * 0x4377 are arbitrary numbers that should be unique in the space of
165 * SSC tags, but there is no central issuing authority rather
166 * randomness is expected to work.
167 */
168 #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376)
169 #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377)
170
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000171 // Markers for architecture simulation.
172 // FORKING : Before the master thread forks.
173 // JOINING : At the start of the join.
174 // INVOKING : Before the threads invoke microtasks.
175 // DISPATCH_INIT: At the start of dynamically scheduled loop.
176 // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop.
177 #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693)
178 #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694)
179 #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695)
180 #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696)
181 #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697)
182
Jim Cownie5e8470a2013-09-27 10:38:44 +0000183 // The object is an address that associates a specific set of the prepare, acquire, release,
184 // and cancel operations.
185
186 /* Sync prepare indicates a thread is going to start waiting for another thread
187 to send a release event. This operation should be done just before the thread
188 begins checking for the existence of the release event */
189
190 /* Sync cancel indicates a thread is cancelling a wait on another thread anc
191 continuing execution without waiting for the other thread to release it */
192
193 /* Sync acquired indicates a thread has received a release event from another
194 thread and has stopped waiting. This operation must occur only after the release
195 event is received. */
196
197 /* Sync release indicates a thread is going to send a release event to another thread
198 so it will stop waiting and continue execution. This operation must just happen before
199 the release event. */
200
201 #define KMP_FSYNC_PREPARE( obj ) __itt_fsync_prepare( (void *)( obj ) )
202 #define KMP_FSYNC_CANCEL( obj ) __itt_fsync_cancel( (void *)( obj ) )
203 #define KMP_FSYNC_ACQUIRED( obj ) __itt_fsync_acquired( (void *)( obj ) )
204 #define KMP_FSYNC_RELEASING( obj ) __itt_fsync_releasing( (void *)( obj ) )
205
206 /*
207 In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called with a delay
208 (and not called at all if waiting time is small). So, in spin loops, do not use
209 KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before spin loop),
210 KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and KMP_FSYNC_SPIN_ACQUIRED().
211 See KMP_WAIT_YIELD() for example.
212 */
213
214 #undef KMP_FSYNC_SPIN_INIT
215 #define KMP_FSYNC_SPIN_INIT( obj, spin ) \
216 int sync_iters = 0; \
217 if ( __itt_fsync_prepare_ptr ) { \
218 if ( obj == NULL ) { \
219 obj = spin; \
220 } /* if */ \
221 } /* if */ \
222 SSC_MARK_SPIN_START()
223
224 #undef KMP_FSYNC_SPIN_PREPARE
225 #define KMP_FSYNC_SPIN_PREPARE( obj ) do { \
226 if ( __itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay ) { \
227 ++ sync_iters; \
228 if ( sync_iters >= __kmp_itt_prepare_delay ) { \
229 KMP_FSYNC_PREPARE( (void*) obj ); \
230 } /* if */ \
231 } /* if */ \
232 } while (0)
233 #undef KMP_FSYNC_SPIN_ACQUIRED
234 #define KMP_FSYNC_SPIN_ACQUIRED( obj ) do { \
235 SSC_MARK_SPIN_END(); \
236 if ( sync_iters >= __kmp_itt_prepare_delay ) { \
237 KMP_FSYNC_ACQUIRED( (void*) obj ); \
238 } /* if */ \
239 } while (0)
240
241 /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.:
242 KMP_ITT_IGNORE(
243 ptr = malloc( size );
244 );
245 */
246 #define KMP_ITT_IGNORE( statement ) do { \
247 __itt_state_t __itt_state_; \
248 if ( __itt_state_get_ptr ) { \
249 __itt_state_ = __itt_state_get(); \
250 __itt_obj_mode_set( __itt_obj_prop_ignore, __itt_obj_state_set ); \
251 } /* if */ \
252 { statement } \
253 if ( __itt_state_get_ptr ) { \
254 __itt_state_set( __itt_state_ ); \
255 } /* if */ \
256 } while (0)
257
258 const int KMP_MAX_FRAME_DOMAINS = 512; // Maximum number of frame domains to use (maps to
259 // different OpenMP regions in the user source code).
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000260 extern kmp_int32 __kmp_barrier_domain_count;
261 extern kmp_int32 __kmp_region_domain_count;
262 extern __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS];
263 extern __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS];
264 extern __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS];
265 extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS];
266 extern __itt_domain * metadata_domain;
Jonathan Peyton4ba3b0c2016-06-16 20:11:51 +0000267 extern __itt_string_handle * string_handle_imbl;
268 extern __itt_string_handle * string_handle_loop;
269 extern __itt_string_handle * string_handle_sngl;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000270
Jim Cownie5e8470a2013-09-27 10:38:44 +0000271#else
272
273// Null definitions of the synchronization tracing functions.
274# define KMP_FSYNC_PREPARE( obj ) ((void)0)
275# define KMP_FSYNC_CANCEL( obj ) ((void)0)
276# define KMP_FSYNC_ACQUIRED( obj ) ((void)0)
277# define KMP_FSYNC_RELEASING( obj ) ((void)0)
278
279# define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0)
280# define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0)
281# define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0)
282
283# define KMP_ITT_IGNORE(stmt ) do { stmt } while (0)
284
285#endif // USE_ITT_NOTIFY
286
287#if ! KMP_DEBUG
288 // In release mode include definitions of inline functions.
289 #include "kmp_itt.inl"
290#endif
291
292#endif // KMP_ITT_H
293
294#else /* USE_ITT_BUILD */
295
296// Null definitions of the synchronization tracing functions.
297// If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either.
298// By defining these we avoid unpleasant ifdef tests in many places.
299# define KMP_FSYNC_PREPARE( obj ) ((void)0)
300# define KMP_FSYNC_CANCEL( obj ) ((void)0)
301# define KMP_FSYNC_ACQUIRED( obj ) ((void)0)
302# define KMP_FSYNC_RELEASING( obj ) ((void)0)
303
304# define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0)
305# define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0)
306# define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0)
307
308# define KMP_ITT_IGNORE(stmt ) do { stmt } while (0)
309
Jim Cownie181b4bb2013-12-23 17:28:57 +0000310# define USE_ITT_BUILD_ARG(x)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000311
312#endif /* USE_ITT_BUILD */