Update implementation of OMPT to the specification OpenMP 5.0 Preview 1 (TR4).
The code is tested to work with latest clang, GNU and Intel compiler. The implementation
is optimized for low overhead when no tool is attached shifting the cost to execution with
tool attached.
This patch does not implement OMPT for libomptarget.
Patch by Simon Convent and Joachim Protze
Differential Revision: https://reviews.llvm.org/D38185
llvm-svn: 317085
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 3c40873..bd3b8fe 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -722,16 +722,6 @@
/* TODO replace with general release procedure */
team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
-#if OMPT_SUPPORT && OMPT_BLAME
- if (ompt_enabled &&
- ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
- /* accept blame for "ordered" waiting */
- kmp_info_t *this_thread = __kmp_threads[gtid];
- ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
- this_thread->th.ompt_thread_info.wait_id);
- }
-#endif
-
KMP_MB(); /* Flush all pending memory write invalidates. */
}
#endif /* BUILD_PARALLEL_ORDERED */
@@ -1204,6 +1194,28 @@
this_thr->th.th_set_proc_bind = proc_bind_default;
#endif /* OMP_40_ENABLED */
+#if OMPT_SUPPORT
+ ompt_data_t ompt_parallel_data;
+ ompt_parallel_data.ptr = NULL;
+ ompt_data_t *implicit_task_data;
+ void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
+ if (ompt_enabled.enabled &&
+ this_thr->th.ompt_thread_info.state != omp_state_overhead) {
+
+ ompt_task_info_t *parent_task_info;
+ parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
+
+ parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (ompt_enabled.ompt_callback_parallel_begin) {
+ int team_size = 1;
+
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
+ &(parent_task_info->task_data), &(parent_task_info->frame),
+ &ompt_parallel_data, team_size, ompt_invoker_program, codeptr);
+ }
+ }
+#endif // OMPT_SUPPORT
+
if (this_thr->th.th_team != serial_team) {
// Nested level will be an index in the nested nthreads array
int level = this_thr->th.th_team->t.t_level;
@@ -1215,13 +1227,9 @@
__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
-#if OMPT_SUPPORT
- ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
-#endif
-
new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
#if OMPT_SUPPORT
- ompt_parallel_id,
+ ompt_parallel_data,
#endif
#if OMP_40_ENABLED
proc_bind,
@@ -1316,11 +1324,6 @@
}
this_thr->th.th_dispatch = serial_team->t.t_dispatch;
-#if OMPT_SUPPORT
- ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
- __ompt_team_assign_id(serial_team, ompt_parallel_id);
-#endif
-
KMP_MB();
} else {
@@ -1364,17 +1367,41 @@
if (__kmp_env_consistency_check)
__kmp_push_parallel(global_tid, NULL);
+#if OMPT_SUPPORT
+ serial_team->t.ompt_team_info.master_return_address = codeptr;
+ if (ompt_enabled.enabled &&
+ this_thr->th.ompt_thread_info.state != omp_state_overhead) {
+ OMPT_CUR_TASK_INFO(this_thr)
+ ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+
+ ompt_lw_taskteam_t lw_taskteam;
+ __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
+ &ompt_parallel_data, codeptr);
+
+ __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
+ // don't use lw_taskteam after linking. content was swaped
+
+ /* OMPT implicit task begin */
+ implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
+ OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid));
+ }
+
+ /* OMPT state */
+ this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
+ OMPT_CUR_TASK_INFO(this_thr)
+ ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1);
+ }
+#endif
}
/* most of the work for a fork */
/* return true if we really went parallel, false if serialized */
int __kmp_fork_call(ident_t *loc, int gtid,
enum fork_context_e call_context, // Intel, GNU, ...
- kmp_int32 argc,
-#if OMPT_SUPPORT
- void *unwrapped_task,
-#endif
- microtask_t microtask, launch_t invoker,
+ kmp_int32 argc, microtask_t microtask, launch_t invoker,
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
va_list *ap
@@ -1432,16 +1459,17 @@
master_set_numthreads = master_th->th.th_set_nproc;
#if OMPT_SUPPORT
- ompt_parallel_id_t ompt_parallel_id;
- ompt_task_id_t ompt_task_id;
+ ompt_data_t ompt_parallel_data;
+ ompt_parallel_data.ptr = NULL;
+ ompt_data_t *parent_task_data;
ompt_frame_t *ompt_frame;
- ompt_task_id_t my_task_id;
- ompt_parallel_id_t my_parallel_id;
+ ompt_data_t *implicit_task_data;
+ void *return_address = NULL;
- if (ompt_enabled) {
- ompt_parallel_id = __ompt_parallel_id_new(gtid);
- ompt_task_id = __ompt_get_task_id_internal(0);
- ompt_frame = __ompt_get_task_frame_internal(0);
+ if (ompt_enabled.enabled) {
+ __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
+ NULL, NULL);
+ return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
}
#endif
@@ -1465,13 +1493,16 @@
#endif
#if OMPT_SUPPORT
- if (ompt_enabled &&
- ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
- int team_size = master_set_numthreads;
-
- ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
- ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task,
- OMPT_INVOKER(call_context));
+ if (ompt_enabled.enabled) {
+ if (ompt_enabled.ompt_callback_parallel_begin) {
+ int team_size = master_set_numthreads
+ ? master_set_numthreads
+ : get__nproc_2(parent_team, master_tid);
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
+ parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
+ OMPT_INVOKER(call_context), return_address);
+ }
+ master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
@@ -1508,27 +1539,25 @@
ompt_lw_taskteam_t lw_taskteam;
- if (ompt_enabled) {
- __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task,
- ompt_parallel_id);
- lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
+ if (ompt_enabled.enabled) {
+ __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
+ &ompt_parallel_data, return_address);
exit_runtime_p =
&(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
- __ompt_lw_taskteam_link(&lw_taskteam, master_th);
+ __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
+ // don't use lw_taskteam after linking. content was swaped
-#if OMPT_TRACE
/* OMPT implicit task begin */
- my_task_id = lw_taskteam.ompt_task_info.task_id;
- my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
- my_parallel_id, my_task_id);
+ implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
+ implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
}
-#endif
/* OMPT state */
- master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+ master_th->th.ompt_thread_info.state = omp_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
@@ -1547,34 +1576,27 @@
#if OMPT_SUPPORT
*exit_runtime_p = NULL;
- if (ompt_enabled) {
-#if OMPT_TRACE
- lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
-
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
- ompt_parallel_id, ompt_task_id);
+ if (ompt_enabled.enabled) {
+ OMPT_CUR_TASK_INFO(master_th)->frame.exit_runtime_frame = NULL;
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, implicit_task_data, 1,
+ __kmp_tid_from_gtid(gtid));
}
-
__ompt_lw_taskteam_unlink(master_th);
- // reset clear the task id only after unlinking the task
- lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
-#endif
- if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
- ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
- ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
+ if (ompt_enabled.ompt_callback_parallel_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+ OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
+ OMPT_INVOKER(call_context), return_address);
}
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
return TRUE;
}
parent_team->t.t_pkfn = microtask;
-#if OMPT_SUPPORT
- parent_team->t.ompt_team_info.microtask = unwrapped_task;
-#endif
parent_team->t.t_invoke = invoker;
KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
parent_team->t.t_active_level++;
@@ -1726,28 +1748,27 @@
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
+ ompt_task_info_t *task_info;
ompt_lw_taskteam_t lw_taskteam;
- if (ompt_enabled) {
+ if (ompt_enabled.enabled) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
- unwrapped_task, ompt_parallel_id);
- lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
- exit_runtime_p =
- &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
+ &ompt_parallel_data, return_address);
- __ompt_lw_taskteam_link(&lw_taskteam, master_th);
+ __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
+ // don't use lw_taskteam after linking. content was swaped
-#if OMPT_TRACE
- my_task_id = lw_taskteam.ompt_task_info.task_id;
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
- ompt_parallel_id, my_task_id);
+ task_info = OMPT_CUR_TASK_INFO(master_th);
+ exit_runtime_p = &(task_info->frame.exit_runtime_frame);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
+ &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid));
}
-#endif
/* OMPT state */
- master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+ master_th->th.ompt_thread_info.state = omp_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
@@ -1766,26 +1787,21 @@
}
#if OMPT_SUPPORT
- *exit_runtime_p = NULL;
- if (ompt_enabled) {
- lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
-
-#if OMPT_TRACE
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
- ompt_parallel_id, ompt_task_id);
+ if (ompt_enabled.enabled) {
+ exit_runtime_p = NULL;
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, &(task_info->task_data), 1,
+ __kmp_tid_from_gtid(gtid));
}
-#endif
__ompt_lw_taskteam_unlink(master_th);
- // reset clear the task id only after unlinking the task
- lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
-
- if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
- ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
- ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
+ if (ompt_enabled.ompt_callback_parallel_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+ OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
+ OMPT_INVOKER(call_context), return_address);
}
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
} else if (microtask == (microtask_t)__kmp_teams_master) {
@@ -1834,30 +1850,28 @@
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
+ ompt_task_info_t *task_info;
ompt_lw_taskteam_t lw_taskteam;
- if (ompt_enabled) {
+ if (ompt_enabled.enabled) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
- unwrapped_task, ompt_parallel_id);
- lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
- exit_runtime_p =
- &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
+ &ompt_parallel_data, return_address);
+ __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
+ // don't use lw_taskteam after linking. content was swaped
+ task_info = OMPT_CUR_TASK_INFO(master_th);
+ exit_runtime_p = &(task_info->frame.exit_runtime_frame);
- __ompt_lw_taskteam_link(&lw_taskteam, master_th);
-
-#if OMPT_TRACE
/* OMPT implicit task begin */
- my_task_id = lw_taskteam.ompt_task_info.task_id;
- my_parallel_id = ompt_parallel_id;
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
- my_parallel_id, my_task_id);
+ implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
+ implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
}
-#endif
/* OMPT state */
- master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+ master_th->th.ompt_thread_info.state = omp_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
@@ -1875,26 +1889,22 @@
}
#if OMPT_SUPPORT
- *exit_runtime_p = NULL;
- if (ompt_enabled) {
-#if OMPT_TRACE
- lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
-
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
- my_parallel_id, my_task_id);
+ if (ompt_enabled.enabled) {
+ *exit_runtime_p = NULL;
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, &(task_info->task_data), 1,
+ __kmp_tid_from_gtid(gtid));
}
-#endif
+ ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
__ompt_lw_taskteam_unlink(master_th);
- // reset clear the task id only after unlinking the task
- lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
-
- if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
- ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
- ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
+ if (ompt_enabled.ompt_callback_parallel_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+ &ompt_parallel_data, parent_task_data,
+ OMPT_INVOKER(call_context), return_address);
}
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
#if OMP_40_ENABLED
@@ -1902,14 +1912,13 @@
#endif /* OMP_40_ENABLED */
} else if (call_context == fork_context_gnu) {
#if OMPT_SUPPORT
- ompt_lw_taskteam_t *lwt =
- (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
- __ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task,
- ompt_parallel_id);
+ ompt_lw_taskteam_t lwt;
+ __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
+ return_address);
- lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
- lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
- __ompt_lw_taskteam_link(lwt, master_th);
+ lwt.ompt_task_info.frame.exit_runtime_frame = NULL;
+ __ompt_lw_taskteam_link(&lwt, master_th, 1);
+// don't use lw_taskteam after linking. content was swaped
#endif
// we were called from GNU native code
@@ -2004,7 +2013,7 @@
KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
team = __kmp_allocate_team(root, nthreads, nthreads,
#if OMPT_SUPPORT
- ompt_parallel_id,
+ ompt_parallel_data,
#endif
#if OMP_40_ENABLED
proc_bind,
@@ -2015,7 +2024,7 @@
KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
team = __kmp_allocate_team(root, nthreads, nthreads,
#if OMPT_SUPPORT
- ompt_parallel_id,
+ ompt_parallel_data,
#endif
#if OMP_40_ENABLED
proc_bind,
@@ -2033,7 +2042,8 @@
KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
#if OMPT_SUPPORT
- KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
+ KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
+ return_address);
#endif
KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
// TODO: parent_team->t.t_level == INT_MAX ???
@@ -2167,7 +2177,7 @@
&master_th->th.th_current_task->td_icvs, loc);
#if OMPT_SUPPORT
- master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
+ master_th->th.ompt_thread_info.state = omp_state_work_parallel;
#endif
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
@@ -2251,8 +2261,8 @@
KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
#if OMPT_SUPPORT
- if (ompt_enabled) {
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ if (ompt_enabled.enabled) {
+ master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
@@ -2264,17 +2274,18 @@
kmp_team_t *team) {
// restore state outside the region
thread->th.ompt_thread_info.state =
- ((team->t.t_serialized) ? ompt_state_work_serial
- : ompt_state_work_parallel);
+ ((team->t.t_serialized) ? omp_state_work_serial
+ : omp_state_work_parallel);
}
-static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team,
- ompt_parallel_id_t parallel_id,
- fork_context_e fork_context) {
- ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
- if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
- ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
- parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
+static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
+ kmp_team_t *team, ompt_data_t *parallel_data,
+ fork_context_e fork_context, void *codeptr) {
+ ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+ if (ompt_enabled.ompt_callback_parallel_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
+ parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
+ codeptr);
}
task_info->frame.reenter_runtime_frame = NULL;
@@ -2311,8 +2322,8 @@
master_th->th.th_ident = loc;
#if OMPT_SUPPORT
- if (ompt_enabled) {
- master_th->th.ompt_thread_info.state = ompt_state_overhead;
+ if (ompt_enabled.enabled) {
+ master_th->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
@@ -2349,7 +2360,7 @@
__kmpc_end_serialized_parallel(loc, gtid);
#if OMPT_SUPPORT
- if (ompt_enabled) {
+ if (ompt_enabled.enabled) {
__kmp_join_restore_state(master_th, parent_team);
}
#endif
@@ -2377,7 +2388,8 @@
KMP_MB();
#if OMPT_SUPPORT
- ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
+ ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
+ void *codeptr = team->t.ompt_team_info.master_return_address;
#endif
#if USE_ITT_BUILD
@@ -2449,8 +2461,9 @@
}
#if OMPT_SUPPORT
- if (ompt_enabled) {
- __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
+ if (ompt_enabled.enabled) {
+ __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
+ codeptr);
}
#endif
@@ -2479,15 +2492,18 @@
}
KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
-#if OMPT_SUPPORT && OMPT_TRACE
- if (ompt_enabled) {
- ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
- parallel_id, task_info->task_id);
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ int ompt_team_size = team->t.t_nproc;
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
+ __kmp_tid_from_gtid(gtid));
}
+
task_info->frame.exit_runtime_frame = NULL;
- task_info->task_id = 0;
+ task_info->task_data = ompt_data_none;
}
#endif
@@ -2558,8 +2574,9 @@
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
#if OMPT_SUPPORT
- if (ompt_enabled) {
- __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
+ if (ompt_enabled.enabled) {
+ __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
+ codeptr);
}
#endif
@@ -3154,7 +3171,7 @@
1, // new_nproc
1, // max_nproc
#if OMPT_SUPPORT
- 0, // root parallel id
+ ompt_data_none, // root parallel id
#endif
#if OMP_40_ENABLED
__kmp_nested_proc_bind.bind_types[0],
@@ -3195,7 +3212,7 @@
1, // new_nproc
__kmp_dflt_team_nth_ub * 2, // max_nproc
#if OMPT_SUPPORT
- 0, // root parallel id
+ ompt_data_none, // root parallel id
#endif
#if OMP_40_ENABLED
__kmp_nested_proc_bind.bind_types[0],
@@ -3734,6 +3751,9 @@
__kmp_print_thread_storage_map(root_thread, gtid);
}
root_thread->th.th_info.ds.ds_gtid = gtid;
+#if OMPT_SUPPORT
+ root_thread->th.ompt_thread_info.thread_data.ptr = NULL;
+#endif
root_thread->th.th_root = root;
if (__kmp_env_consistency_check) {
root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
@@ -3756,7 +3776,7 @@
root_thread->th.th_serial_team =
__kmp_allocate_team(root, 1, 1,
#if OMPT_SUPPORT
- 0, // root parallel id
+ ompt_data_none, // root parallel id
#endif
#if OMP_40_ENABLED
proc_bind_default,
@@ -3826,6 +3846,29 @@
__kmp_root_counter++;
+#if OMPT_SUPPORT
+ if (!initial_thread && ompt_enabled.enabled) {
+
+ ompt_thread_t *root_thread = ompt_get_thread();
+
+ ompt_set_thread_state(root_thread, omp_state_overhead);
+
+ if (ompt_enabled.ompt_callback_thread_begin) {
+ ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
+ ompt_thread_initial, __ompt_get_thread_data_internal());
+ }
+ ompt_data_t *task_data;
+ __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
+ if (ompt_enabled.ompt_callback_task_create) {
+ ompt_callbacks.ompt_callback(ompt_callback_task_create)(
+ NULL, NULL, task_data, ompt_task_initial, 0, NULL);
+ // initial task has nothing to return to
+ }
+
+ ompt_set_thread_state(root_thread, omp_state_work_serial);
+ }
+#endif
+
KMP_MB();
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
@@ -3909,9 +3952,9 @@
#endif /* KMP_OS_WINDOWS */
#if OMPT_SUPPORT
- if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
- int gtid = __kmp_get_gtid();
- __ompt_thread_end(ompt_thread_initial, gtid);
+ if (ompt_enabled.ompt_callback_thread_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
+ &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
}
#endif
@@ -3961,7 +4004,7 @@
if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
#if OMPT_SUPPORT
// the runtime is shutting down so we won't report any events
- thread->th.ompt_thread_info.state = ompt_state_undefined;
+ thread->th.ompt_thread_info.state = omp_state_undefined;
#endif
__kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
}
@@ -4282,7 +4325,7 @@
new_thr->th.th_serial_team = serial_team =
(kmp_team_t *)__kmp_allocate_team(root, 1, 1,
#if OMPT_SUPPORT
- 0, // root parallel id
+ ompt_data_none, // root parallel id
#endif
#if OMP_40_ENABLED
proc_bind_default,
@@ -4813,7 +4856,7 @@
kmp_team_t *
__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#if OMPT_SUPPORT
- ompt_parallel_id_t ompt_parallel_id,
+ ompt_data_t ompt_parallel_data,
#endif
#if OMP_40_ENABLED
kmp_proc_bind_t new_proc_bind,
@@ -5180,7 +5223,7 @@
#endif
#if OMPT_SUPPORT
- __ompt_team_assign_id(team, ompt_parallel_id);
+ __ompt_team_assign_id(team, ompt_parallel_data);
#endif
KMP_MB();
@@ -5232,7 +5275,7 @@
team->t.t_id));
#if OMPT_SUPPORT
- __ompt_team_assign_id(team, ompt_parallel_id);
+ __ompt_team_assign_id(team, ompt_parallel_data);
#endif
KMP_MB();
@@ -5296,7 +5339,7 @@
#endif
#if OMPT_SUPPORT
- __ompt_team_assign_id(team, ompt_parallel_id);
+ __ompt_team_assign_id(team, ompt_parallel_data);
team->t.ompt_serialized_team_info = NULL;
#endif
@@ -5563,16 +5606,26 @@
}
#if OMPT_SUPPORT
- if (ompt_enabled) {
- this_thr->th.ompt_thread_info.state = ompt_state_overhead;
+ ompt_data_t *thread_data;
+ if (ompt_enabled.enabled) {
+ thread_data = &(this_thr->th.ompt_thread_info.thread_data);
+ thread_data->ptr = NULL;
+
+ this_thr->th.ompt_thread_info.state = omp_state_overhead;
this_thr->th.ompt_thread_info.wait_id = 0;
- this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
- if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
- __ompt_thread_begin(ompt_thread_worker, gtid);
+ this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
+ if (ompt_enabled.ompt_callback_thread_begin) {
+ ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
+ ompt_thread_worker, thread_data);
}
}
#endif
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ this_thr->th.ompt_thread_info.state = omp_state_idle;
+ }
+#endif
/* This is the place where threads wait for work */
while (!TCR_4(__kmp_global.g.g_done)) {
KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
@@ -5581,18 +5634,12 @@
/* wait for work to do */
KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
-#if OMPT_SUPPORT
- if (ompt_enabled) {
- this_thr->th.ompt_thread_info.state = ompt_state_idle;
- }
-#endif
-
/* No tid yet since not part of a team */
__kmp_fork_barrier(gtid, KMP_GTID_DNE);
#if OMPT_SUPPORT
- if (ompt_enabled) {
- this_thr->th.ompt_thread_info.state = ompt_state_overhead;
+ if (ompt_enabled.enabled) {
+ this_thr->th.ompt_thread_info.state = omp_state_overhead;
}
#endif
@@ -5600,14 +5647,6 @@
/* have we been allocated? */
if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
-#if OMPT_SUPPORT
- ompt_task_info_t *task_info;
- ompt_parallel_id_t my_parallel_id;
- if (ompt_enabled) {
- task_info = __ompt_get_taskinfo(0);
- my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
- }
-#endif
/* we were just woken up, so run our new task */
if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
int rc;
@@ -5619,11 +5658,8 @@
updateHWFPControl(*pteam);
#if OMPT_SUPPORT
- if (ompt_enabled) {
- this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
- // Initialize OMPT task id for implicit task.
- int tid = __kmp_tid_from_gtid(gtid);
- task_info->task_id = __ompt_task_id_new(tid);
+ if (ompt_enabled.enabled) {
+ this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
}
#endif
@@ -5634,40 +5670,29 @@
}
KMP_ASSERT(rc);
-#if OMPT_SUPPORT
- if (ompt_enabled) {
- /* no frame set while outside task */
- task_info->frame.exit_runtime_frame = NULL;
-
- this_thr->th.ompt_thread_info.state = ompt_state_overhead;
- }
-#endif
KMP_MB();
KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
(*pteam)->t.t_pkfn));
}
- /* join barrier after parallel region */
- __kmp_join_barrier(gtid);
-#if OMPT_SUPPORT && OMPT_TRACE
- if (ompt_enabled) {
- if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
- // don't access *pteam here: it may have already been freed
- // by the master thread behind the barrier (possible race)
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
- my_parallel_id, task_info->task_id);
- }
- task_info->frame.exit_runtime_frame = NULL;
- task_info->task_id = 0;
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ /* no frame set while outside task */
+ __ompt_get_task_info_object(0)->frame.exit_runtime_frame = NULL;
+
+ this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
}
#endif
+ /* join barrier after parallel region */
+ __kmp_join_barrier(gtid);
}
}
TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
#if OMPT_SUPPORT
- if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
- __ompt_thread_end(ompt_thread_worker, gtid);
+ if (ompt_enabled.ompt_callback_thread_end) {
+ ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
}
#endif
@@ -6925,26 +6950,27 @@
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
- ompt_task_id_t my_task_id;
- ompt_parallel_id_t my_parallel_id;
+ ompt_data_t *my_task_data;
+ ompt_data_t *my_parallel_data;
+ int ompt_team_size;
- if (ompt_enabled) {
+ if (ompt_enabled.enabled) {
exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]
.ompt_task_info.frame.exit_runtime_frame);
} else {
exit_runtime_p = &dummy;
}
-#if OMPT_TRACE
- my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
- my_parallel_id = team->t.ompt_team_info.parallel_id;
- if (ompt_enabled &&
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
- ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id,
- my_task_id);
+ my_task_data =
+ &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
+ my_parallel_data = &(team->t.ompt_team_info.parallel_data);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_team_size = team->t.t_nproc;
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
+ __kmp_tid_from_gtid(gtid));
}
#endif
-#endif
{
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
@@ -6991,9 +7017,6 @@
SSC_MARK_FORKING();
#endif
__kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
-#if OMPT_SUPPORT
- (void *)thr->th.th_teams_microtask, // "unwrapped" task
-#endif
(microtask_t)thr->th.th_teams_microtask, // "wrapped" task
VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
#if INCLUDE_SSC_MARKS
@@ -7170,6 +7193,36 @@
#endif /* KMP_DEBUG */
__kmp_join_barrier(gtid); /* wait for everyone */
+#if OMPT_SUPPORT
+ int ds_tid = this_thr->th.th_info.ds.ds_tid;
+ if (this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
+ ompt_data_t *tId = OMPT_CUR_TASK_DATA(this_thr);
+ ompt_data_t *pId = OMPT_CUR_TEAM_DATA(this_thr);
+ this_thr->th.ompt_thread_info.state = omp_state_overhead;
+#if OMPT_OPTIONAL
+ void *codeptr = NULL;
+ if (KMP_MASTER_TID(ds_tid) &&
+ (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
+ ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
+ codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
+
+ if (ompt_enabled.ompt_callback_sync_region_wait) {
+ ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
+ ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr);
+ }
+ if (ompt_enabled.ompt_callback_sync_region) {
+ ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
+ ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr);
+ }
+#endif
+ if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, NULL, tId, 0, ds_tid);
+ }
+ // return to idle state
+ this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ }
+#endif
KMP_MB(); /* Flush all pending memory write invalidates. */
KMP_ASSERT(this_thr->th.th_team == team);