Jim Cownie | 5e8470a | 2013-09-27 10:38:44 +0000 | [diff] [blame] | 1 | #if USE_ITT_BUILD |
| 2 | /* |
| 3 | * kmp_itt.h -- ITT Notify interface. |
Jim Cownie | 5e8470a | 2013-09-27 10:38:44 +0000 | [diff] [blame] | 4 | */ |
| 5 | |
| 6 | |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // The LLVM Compiler Infrastructure |
| 10 | // |
| 11 | // This file is dual licensed under the MIT and the University of Illinois Open |
| 12 | // Source Licenses. See LICENSE.txt for details. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | |
| 17 | #ifndef KMP_ITT_H |
| 18 | #define KMP_ITT_H |
| 19 | |
| 20 | #include "kmp_lock.h" |
| 21 | |
| 22 | #define INTEL_ITTNOTIFY_API_PRIVATE |
| 23 | #include "ittnotify.h" |
| 24 | #include "legacy/ittnotify.h" |
| 25 | |
| 26 | #if KMP_DEBUG |
| 27 | #define __kmp_inline // Turn off inlining in debug mode. |
| 28 | #else |
| 29 | #define __kmp_inline static inline |
| 30 | #endif |
| 31 | |
| 32 | #if USE_ITT_NOTIFY |
| 33 | extern kmp_int32 __kmp_itt_prepare_delay; |
| 34 | # ifdef __cplusplus |
| 35 | extern "C" void __kmp_itt_fini_ittlib(void); |
| 36 | # else |
| 37 | extern void __kmp_itt_fini_ittlib(void); |
| 38 | # endif |
| 39 | #endif |
| 40 | |
| 41 | // Simplify the handling of an argument that is only required when USE_ITT_BUILD is enabled. |
| 42 | #define USE_ITT_BUILD_ARG(x) ,x |
| 43 | |
| 44 | void __kmp_itt_initialize(); |
| 45 | void __kmp_itt_destroy(); |
| 46 | |
| 47 | // ------------------------------------------------------------------------------------------------- |
| 48 | // New stuff for reporting high-level constructs. |
| 49 | // ------------------------------------------------------------------------------------------------- |
| 50 | |
| 51 | // Note the naming convention: |
| 52 | // __kmp_itt_xxxing() function should be called before action, while |
| 53 | // __kmp_itt_xxxed() function should be called after action. |
| 54 | |
| 55 | // --- Parallel region reporting --- |
Jonathan Peyton | a1202bf | 2016-04-19 16:55:17 +0000 | [diff] [blame] | 56 | __kmp_inline void __kmp_itt_region_forking( int gtid, int team_size, int barriers ); // Master only, before forking threads. |
| 57 | __kmp_inline void __kmp_itt_region_joined( int gtid ); // Master only, after joining threads. |
Jim Cownie | 5e8470a | 2013-09-27 10:38:44 +0000 | [diff] [blame] | 58 | // (*) Note: A thread may execute tasks after this point, though. |
| 59 | |
Jim Cownie | 181b4bb | 2013-12-23 17:28:57 +0000 | [diff] [blame] | 60 | // --- Frame reporting --- |
Jim Cownie | 4cc4bb4 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 61 | // region = 0 - no regions, region = 1 - parallel, region = 2 - serialized parallel |
| 62 | __kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc, int team_size, int region = 0 ); |
| 63 | |
| 64 | // --- Metadata reporting --- |
| 65 | // begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated wait time value, reduction -if this is a reduction barrier |
| 66 | __kmp_inline void __kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ); |
| 67 | // sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); iterations - loop trip count, chunk - chunk size |
| 68 | __kmp_inline void __kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ); |
Andrey Churbanov | f6451d9 | 2015-01-16 15:58:03 +0000 | [diff] [blame] | 69 | __kmp_inline void __kmp_itt_metadata_single( ident_t * loc ); |
Jim Cownie | 181b4bb | 2013-12-23 17:28:57 +0000 | [diff] [blame] | 70 | |
Jim Cownie | 5e8470a | 2013-09-27 10:38:44 +0000 | [diff] [blame] | 71 | // --- Barrier reporting --- |
| 72 | __kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 ); |
| 73 | __kmp_inline void __kmp_itt_barrier_starting( int gtid, void * object ); |
| 74 | __kmp_inline void __kmp_itt_barrier_middle( int gtid, void * object ); |
| 75 | __kmp_inline void __kmp_itt_barrier_finished( int gtid, void * object ); |
| 76 | |
| 77 | // --- Taskwait reporting --- |
| 78 | __kmp_inline void * __kmp_itt_taskwait_object( int gtid ); |
| 79 | __kmp_inline void __kmp_itt_taskwait_starting( int gtid, void * object ); |
| 80 | __kmp_inline void __kmp_itt_taskwait_finished( int gtid, void * object ); |
| 81 | |
| 82 | // --- Task reporting --- |
| 83 | __kmp_inline void __kmp_itt_task_starting( void * object ); |
| 84 | __kmp_inline void __kmp_itt_task_finished( void * object ); |
| 85 | |
| 86 | // --- Lock reporting --- |
Andrey Churbanov | 5c56fb5 | 2015-02-20 18:05:17 +0000 | [diff] [blame] | 87 | #if KMP_USE_DYNAMIC_LOCK |
| 88 | __kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t * ); |
| 89 | #else |
Jim Cownie | 5e8470a | 2013-09-27 10:38:44 +0000 | [diff] [blame] | 90 | __kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock ); |
Andrey Churbanov | 5c56fb5 | 2015-02-20 18:05:17 +0000 | [diff] [blame] | 91 | #endif |
Jim Cownie | 5e8470a | 2013-09-27 10:38:44 +0000 | [diff] [blame] | 92 | __kmp_inline void __kmp_itt_lock_acquiring( kmp_user_lock_p lock ); |
| 93 | __kmp_inline void __kmp_itt_lock_acquired( kmp_user_lock_p lock ); |
| 94 | __kmp_inline void __kmp_itt_lock_releasing( kmp_user_lock_p lock ); |
| 95 | __kmp_inline void __kmp_itt_lock_cancelled( kmp_user_lock_p lock ); |
| 96 | __kmp_inline void __kmp_itt_lock_destroyed( kmp_user_lock_p lock ); |
| 97 | |
| 98 | // --- Critical reporting --- |
Andrey Churbanov | 5c56fb5 | 2015-02-20 18:05:17 +0000 | [diff] [blame] | 99 | #if KMP_USE_DYNAMIC_LOCK |
| 100 | __kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t * ); |
| 101 | #else |
Jim Cownie | 5e8470a | 2013-09-27 10:38:44 +0000 | [diff] [blame] | 102 | __kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock ); |
Andrey Churbanov | 5c56fb5 | 2015-02-20 18:05:17 +0000 | [diff] [blame] | 103 | #endif |
Jim Cownie | 5e8470a | 2013-09-27 10:38:44 +0000 | [diff] [blame] | 104 | __kmp_inline void __kmp_itt_critical_acquiring( kmp_user_lock_p lock ); |
| 105 | __kmp_inline void __kmp_itt_critical_acquired( kmp_user_lock_p lock ); |
| 106 | __kmp_inline void __kmp_itt_critical_releasing( kmp_user_lock_p lock ); |
| 107 | __kmp_inline void __kmp_itt_critical_destroyed( kmp_user_lock_p lock ); |
| 108 | |
| 109 | // --- Single reporting --- |
| 110 | __kmp_inline void __kmp_itt_single_start( int gtid ); |
| 111 | __kmp_inline void __kmp_itt_single_end( int gtid ); |
| 112 | |
| 113 | // --- Ordered reporting --- |
| 114 | __kmp_inline void __kmp_itt_ordered_init( int gtid ); |
| 115 | __kmp_inline void __kmp_itt_ordered_prep( int gtid ); |
| 116 | __kmp_inline void __kmp_itt_ordered_start( int gtid ); |
| 117 | __kmp_inline void __kmp_itt_ordered_end( int gtid ); |
| 118 | |
| 119 | // --- Threads reporting --- |
| 120 | __kmp_inline void __kmp_itt_thread_ignore(); |
| 121 | __kmp_inline void __kmp_itt_thread_name( int gtid ); |
| 122 | |
| 123 | // --- System objects --- |
| 124 | __kmp_inline void __kmp_itt_system_object_created( void * object, char const * name ); |
| 125 | |
| 126 | // --- Stack stitching --- |
| 127 | __kmp_inline __itt_caller __kmp_itt_stack_caller_create(void); |
| 128 | __kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller); |
| 129 | __kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller); |
| 130 | __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller); |
| 131 | |
| 132 | // ------------------------------------------------------------------------------------------------- |
| 133 | // Old stuff for reporting low-level internal synchronization. |
| 134 | // ------------------------------------------------------------------------------------------------- |
| 135 | |
| 136 | #if USE_ITT_NOTIFY |
| 137 | |
| 138 | /* |
| 139 | * Support for SSC marks, which are used by SDE |
| 140 | * http://software.intel.com/en-us/articles/intel-software-development-emulator |
| 141 | * to mark points in instruction traces that represent spin-loops and are |
| 142 | * therefore uninteresting when collecting traces for architecture simulation. |
| 143 | */ |
| 144 | #ifndef INCLUDE_SSC_MARKS |
| 145 | # define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64) |
| 146 | #endif |
| 147 | |
| 148 | /* Linux 64 only for now */ |
| 149 | #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64) |
| 150 | // Portable (at least for gcc and icc) code to insert the necessary instructions |
| 151 | // to set %ebx and execute the unlikely no-op. |
Jim Cownie | 4cc4bb4 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 152 | #if defined( __INTEL_COMPILER ) |
| 153 | # define INSERT_SSC_MARK(tag) __SSC_MARK(tag) |
| 154 | #else |
| 155 | # define INSERT_SSC_MARK(tag) \ |
| 156 | __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag):"%ebx") |
| 157 | #endif |
Jim Cownie | 5e8470a | 2013-09-27 10:38:44 +0000 | [diff] [blame] | 158 | #else |
| 159 | # define INSERT_SSC_MARK(tag) ((void)0) |
| 160 | #endif |
| 161 | |
| 162 | /* Markers for the start and end of regions that represent polling and |
| 163 | * are therefore uninteresting to architectural simulations 0x4376 and |
| 164 | * 0x4377 are arbitrary numbers that should be unique in the space of |
| 165 | * SSC tags, but there is no central issuing authority rather |
| 166 | * randomness is expected to work. |
| 167 | */ |
| 168 | #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376) |
| 169 | #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377) |
| 170 | |
Jim Cownie | 4cc4bb4 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 171 | // Markers for architecture simulation. |
| 172 | // FORKING : Before the master thread forks. |
| 173 | // JOINING : At the start of the join. |
| 174 | // INVOKING : Before the threads invoke microtasks. |
| 175 | // DISPATCH_INIT: At the start of dynamically scheduled loop. |
| 176 | // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop. |
| 177 | #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693) |
| 178 | #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694) |
| 179 | #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695) |
| 180 | #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696) |
| 181 | #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697) |
| 182 | |
Jim Cownie | 5e8470a | 2013-09-27 10:38:44 +0000 | [diff] [blame] | 183 | // The object is an address that associates a specific set of the prepare, acquire, release, |
| 184 | // and cancel operations. |
| 185 | |
| 186 | /* Sync prepare indicates a thread is going to start waiting for another thread |
| 187 | to send a release event. This operation should be done just before the thread |
| 188 | begins checking for the existence of the release event */ |
| 189 | |
| 190 | /* Sync cancel indicates a thread is cancelling a wait on another thread anc |
| 191 | continuing execution without waiting for the other thread to release it */ |
| 192 | |
| 193 | /* Sync acquired indicates a thread has received a release event from another |
| 194 | thread and has stopped waiting. This operation must occur only after the release |
| 195 | event is received. */ |
| 196 | |
| 197 | /* Sync release indicates a thread is going to send a release event to another thread |
| 198 | so it will stop waiting and continue execution. This operation must just happen before |
| 199 | the release event. */ |
| 200 | |
| 201 | #define KMP_FSYNC_PREPARE( obj ) __itt_fsync_prepare( (void *)( obj ) ) |
| 202 | #define KMP_FSYNC_CANCEL( obj ) __itt_fsync_cancel( (void *)( obj ) ) |
| 203 | #define KMP_FSYNC_ACQUIRED( obj ) __itt_fsync_acquired( (void *)( obj ) ) |
| 204 | #define KMP_FSYNC_RELEASING( obj ) __itt_fsync_releasing( (void *)( obj ) ) |
| 205 | |
| 206 | /* |
| 207 | In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called with a delay |
| 208 | (and not called at all if waiting time is small). So, in spin loops, do not use |
| 209 | KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before spin loop), |
| 210 | KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and KMP_FSYNC_SPIN_ACQUIRED(). |
| 211 | See KMP_WAIT_YIELD() for example. |
| 212 | */ |
| 213 | |
| 214 | #undef KMP_FSYNC_SPIN_INIT |
| 215 | #define KMP_FSYNC_SPIN_INIT( obj, spin ) \ |
| 216 | int sync_iters = 0; \ |
| 217 | if ( __itt_fsync_prepare_ptr ) { \ |
| 218 | if ( obj == NULL ) { \ |
| 219 | obj = spin; \ |
| 220 | } /* if */ \ |
| 221 | } /* if */ \ |
| 222 | SSC_MARK_SPIN_START() |
| 223 | |
| 224 | #undef KMP_FSYNC_SPIN_PREPARE |
| 225 | #define KMP_FSYNC_SPIN_PREPARE( obj ) do { \ |
| 226 | if ( __itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay ) { \ |
| 227 | ++ sync_iters; \ |
| 228 | if ( sync_iters >= __kmp_itt_prepare_delay ) { \ |
| 229 | KMP_FSYNC_PREPARE( (void*) obj ); \ |
| 230 | } /* if */ \ |
| 231 | } /* if */ \ |
| 232 | } while (0) |
| 233 | #undef KMP_FSYNC_SPIN_ACQUIRED |
| 234 | #define KMP_FSYNC_SPIN_ACQUIRED( obj ) do { \ |
| 235 | SSC_MARK_SPIN_END(); \ |
| 236 | if ( sync_iters >= __kmp_itt_prepare_delay ) { \ |
| 237 | KMP_FSYNC_ACQUIRED( (void*) obj ); \ |
| 238 | } /* if */ \ |
| 239 | } while (0) |
| 240 | |
| 241 | /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.: |
| 242 | KMP_ITT_IGNORE( |
| 243 | ptr = malloc( size ); |
| 244 | ); |
| 245 | */ |
| 246 | #define KMP_ITT_IGNORE( statement ) do { \ |
| 247 | __itt_state_t __itt_state_; \ |
| 248 | if ( __itt_state_get_ptr ) { \ |
| 249 | __itt_state_ = __itt_state_get(); \ |
| 250 | __itt_obj_mode_set( __itt_obj_prop_ignore, __itt_obj_state_set ); \ |
| 251 | } /* if */ \ |
| 252 | { statement } \ |
| 253 | if ( __itt_state_get_ptr ) { \ |
| 254 | __itt_state_set( __itt_state_ ); \ |
| 255 | } /* if */ \ |
| 256 | } while (0) |
| 257 | |
| 258 | const int KMP_MAX_FRAME_DOMAINS = 512; // Maximum number of frame domains to use (maps to |
| 259 | // different OpenMP regions in the user source code). |
Jim Cownie | 4cc4bb4 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 260 | extern kmp_int32 __kmp_barrier_domain_count; |
| 261 | extern kmp_int32 __kmp_region_domain_count; |
| 262 | extern __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; |
| 263 | extern __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; |
| 264 | extern __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; |
| 265 | extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; |
| 266 | extern __itt_domain * metadata_domain; |
Jonathan Peyton | 4ba3b0c | 2016-06-16 20:11:51 +0000 | [diff] [blame] | 267 | extern __itt_string_handle * string_handle_imbl; |
| 268 | extern __itt_string_handle * string_handle_loop; |
| 269 | extern __itt_string_handle * string_handle_sngl; |
Jim Cownie | 4cc4bb4 | 2014-10-07 16:25:50 +0000 | [diff] [blame] | 270 | |
Jim Cownie | 5e8470a | 2013-09-27 10:38:44 +0000 | [diff] [blame] | 271 | #else |
| 272 | |
| 273 | // Null definitions of the synchronization tracing functions. |
| 274 | # define KMP_FSYNC_PREPARE( obj ) ((void)0) |
| 275 | # define KMP_FSYNC_CANCEL( obj ) ((void)0) |
| 276 | # define KMP_FSYNC_ACQUIRED( obj ) ((void)0) |
| 277 | # define KMP_FSYNC_RELEASING( obj ) ((void)0) |
| 278 | |
| 279 | # define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0) |
| 280 | # define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0) |
| 281 | # define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0) |
| 282 | |
| 283 | # define KMP_ITT_IGNORE(stmt ) do { stmt } while (0) |
| 284 | |
| 285 | #endif // USE_ITT_NOTIFY |
| 286 | |
| 287 | #if ! KMP_DEBUG |
| 288 | // In release mode include definitions of inline functions. |
| 289 | #include "kmp_itt.inl" |
| 290 | #endif |
| 291 | |
| 292 | #endif // KMP_ITT_H |
| 293 | |
| 294 | #else /* USE_ITT_BUILD */ |
| 295 | |
| 296 | // Null definitions of the synchronization tracing functions. |
| 297 | // If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either. |
| 298 | // By defining these we avoid unpleasant ifdef tests in many places. |
| 299 | # define KMP_FSYNC_PREPARE( obj ) ((void)0) |
| 300 | # define KMP_FSYNC_CANCEL( obj ) ((void)0) |
| 301 | # define KMP_FSYNC_ACQUIRED( obj ) ((void)0) |
| 302 | # define KMP_FSYNC_RELEASING( obj ) ((void)0) |
| 303 | |
| 304 | # define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0) |
| 305 | # define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0) |
| 306 | # define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0) |
| 307 | |
| 308 | # define KMP_ITT_IGNORE(stmt ) do { stmt } while (0) |
| 309 | |
Jim Cownie | 181b4bb | 2013-12-23 17:28:57 +0000 | [diff] [blame] | 310 | # define USE_ITT_BUILD_ARG(x) |
Jim Cownie | 5e8470a | 2013-09-27 10:38:44 +0000 | [diff] [blame] | 311 | |
| 312 | #endif /* USE_ITT_BUILD */ |