Tidy statistics collection
This removes some statistics counters and timers which were not used,
adds new counters and timers for some language features that were not
monitored previously and separates the counters and timers into those
which are of interest for investigating user code and those which are
only of interest to the developer of the runtime itself.
The runtime developer statistics are now ony collected if the
additional #define KMP_DEVELOPER_STATS is set.
Additional user statistics which are now collected include:
* Count of nested parallelism (omp parallel inside a parallel region)
* Count of omp distribute occurrences
* Count of omp teams occurrences
* Counts of task related statistics (taskyield, task execution, task
cancellation, task steal)
* Values passed to omp_set_numtheads
* Time spent in omp single and omp master
None of this affects code compiled without stats gathering enabled,
which is the normal library build mode.
This also fixes the CMake build by linking to the standard c++ library
when building the stats library as it is a requirement. The normal library
does not have this requirement and its link phase is left alone.
Differential Revision: http://reviews.llvm.org/D11759
llvm-svn: 244677
diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index e6c4e8a..0d9c766 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -46,7 +46,7 @@
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_linear_gather);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
@@ -123,7 +123,7 @@
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_linear_release);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release);
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_team_t *team;
@@ -141,17 +141,18 @@
if (nproc > 1) {
#if KMP_BARRIER_ICV_PUSH
- KMP_START_EXPLICIT_TIMER(USER_icv_copy);
- if (propagate_icvs) {
- ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
- for (i=1; i<nproc; ++i) {
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
- ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
- &team->t.t_implicit_task_taskdata[0].td_icvs);
+ {
+ KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+ if (propagate_icvs) {
+ ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
+ for (i=1; i<nproc; ++i) {
+ __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
+ ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
+ &team->t.t_implicit_task_taskdata[0].td_icvs);
+ }
+ ngo_sync();
}
- ngo_sync();
}
- KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
#endif // KMP_BARRIER_ICV_PUSH
// Now, release all of the worker threads
@@ -217,7 +218,7 @@
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_tree_gather);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
@@ -312,7 +313,7 @@
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_tree_release);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc;
@@ -381,14 +382,15 @@
#endif /* KMP_CACHE_MANAGE */
#if KMP_BARRIER_ICV_PUSH
- KMP_START_EXPLICIT_TIMER(USER_icv_copy);
- if (propagate_icvs) {
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
- team, child_tid, FALSE);
- copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
- &team->t.t_implicit_task_taskdata[0].td_icvs);
+ {
+ KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+ if (propagate_icvs) {
+ __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
+ team, child_tid, FALSE);
+ copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
+ &team->t.t_implicit_task_taskdata[0].td_icvs);
+ }
}
- KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
#endif // KMP_BARRIER_ICV_PUSH
KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid,
@@ -414,7 +416,7 @@
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_hyper_gather);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
@@ -520,7 +522,7 @@
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_hyper_release);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
register kmp_info_t **other_threads;
@@ -725,7 +727,7 @@
int gtid, int tid, void (*reduce) (void *, void *)
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_hier_gather);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc = this_thr->th.th_team_nproc;
@@ -853,7 +855,7 @@
int propagate_icvs
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
- KMP_TIME_BLOCK(KMP_hier_release);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc;
@@ -1035,7 +1037,7 @@
__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
void *reduce_data, void (*reduce)(void *, void *))
{
- KMP_TIME_BLOCK(KMP_barrier);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
register int tid = __kmp_tid_from_gtid(gtid);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team = this_thr->th.th_team;
@@ -1294,7 +1296,7 @@
void
__kmp_end_split_barrier(enum barrier_type bt, int gtid)
{
- KMP_TIME_BLOCK(KMP_end_split_barrier);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier);
int tid = __kmp_tid_from_gtid(gtid);
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = this_thr->th.th_team;
@@ -1335,7 +1337,7 @@
void
__kmp_join_barrier(int gtid)
{
- KMP_TIME_BLOCK(KMP_join_barrier);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team;
register kmp_uint nproc;
@@ -1533,7 +1535,7 @@
void
__kmp_fork_barrier(int gtid, int tid)
{
- KMP_TIME_BLOCK(KMP_fork_barrier);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
#if USE_ITT_BUILD
@@ -1648,15 +1650,16 @@
this data before this function is called. We cannot modify __kmp_fork_call() to look at
the fixed ICVs in the master's thread struct, because it is not always the case that the
threads arrays have been allocated when __kmp_fork_call() is executed. */
- KMP_START_EXPLICIT_TIMER(USER_icv_copy);
- if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
- // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
- KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
- copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
- &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
+ {
+ KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
+ if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
+ // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
+ KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
+ __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
+ copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
+ &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
+ }
}
- KMP_STOP_EXPLICIT_TIMER(USER_icv_copy);
#endif // KMP_BARRIER_ICV_PULL
if (__kmp_tasking_mode != tskm_immediate_exec) {
@@ -1702,7 +1705,7 @@
void
__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc )
{
- KMP_TIME_BLOCK(KMP_setup_icv_copy);
+ KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy);
KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);