For your Christmas hacking pleasure.
This release use aligns with Intel(r) Composer XE 2013 SP1 Product Update 2
New features
* The library can now be built with clang (though wiht some
limitations since clang does not support 128 bit floats)
* Support for Vtune analysis of load imbalance
* Code contribution from Steven Noonan to build the runtime for ARM*
architecture processors
* First implementation of runtime API for OpenMP cancellation
Bug Fixes
* Fixed hang on Windows (only) when using KMP_BLOCKTIME=0
llvm-svn: 197914
diff --git a/openmp/runtime/src/kmp_tasking.c b/openmp/runtime/src/kmp_tasking.c
index ea5cdc0..8cac009 100644
--- a/openmp/runtime/src/kmp_tasking.c
+++ b/openmp/runtime/src/kmp_tasking.c
@@ -1,7 +1,7 @@
/*
* kmp_tasking.c -- OpenMP 3.0 tasking support.
- * $Revision: 42522 $
- * $Date: 2013-07-16 05:28:49 -0500 (Tue, 16 Jul 2013) $
+ * $Revision: 42852 $
+ * $Date: 2013-12-04 10:50:49 -0600 (Wed, 04 Dec 2013) $
*/
@@ -620,13 +620,28 @@
#if OMP_40_ENABLED
if ( taskdata->td_taskgroup )
KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
- __kmp_release_deps(gtid,taskdata);
+ __kmp_release_deps(gtid,taskdata);
#endif
}
KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
gtid, taskdata, children) );
+#if OMP_40_ENABLED
+ /* If the tasks' destructor thunk flag has been set, we need to invoke the
+ destructor thunk that has been generated by the compiler.
+ The code is placed here, since at this point other tasks might have been released
+ hence overlapping the destructor invokations with some other work in the
+ released tasks. The OpenMP spec is not specific on when the destructors are
+ invoked, so we should be free to choose.
+ */
+ if (taskdata->td_flags.destructors_thunk) {
+ kmp_routine_entry_t destr_thunk = task->destructors;
+ KMP_ASSERT(destr_thunk);
+ destr_thunk(gtid, task);
+ }
+#endif // OMP_40_ENABLED
+
// bookkeeping for resuming task:
// GEH - note tasking_ser => task_serial
KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
@@ -739,10 +754,10 @@
task->td_flags.complete = 0;
task->td_flags.freed = 0;
-#if OMP_40_ENABLED
+#if OMP_40_ENABLED
task->td_dephash = NULL;
task->td_depnode = NULL;
-#endif
+#endif
if (set_curr_task) { // only do this initialization the first time a thread is created
task->td_incomplete_child_tasks = 0;
@@ -850,7 +865,7 @@
taskdata->td_task_id = KMP_GEN_TASK_ID();
taskdata->td_team = team;
- taskdata->td_alloc_thread = thread;
+ taskdata->td_alloc_thread = thread;
taskdata->td_parent = parent_task;
taskdata->td_level = parent_task->td_level + 1; // increment nesting level
taskdata->td_ident = loc_ref;
@@ -863,6 +878,9 @@
taskdata->td_flags.tiedness = flags->tiedness;
taskdata->td_flags.final = flags->final;
taskdata->td_flags.merged_if0 = flags->merged_if0;
+#if OMP_40_ENABLED
+ taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
+#endif // OMP_40_ENABLED
taskdata->td_flags.tasktype = TASK_EXPLICIT;
// GEH - TODO: fix this to copy parent task's value of tasking_ser flag
@@ -890,7 +908,7 @@
taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
taskdata->td_dephash = NULL;
taskdata->td_depnode = NULL;
-#endif
+#endif
// Only need to keep track of child task counts if team parallel and tasking not serialized
if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
@@ -946,24 +964,46 @@
__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
{
kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
+#if OMP_40_ENABLED
+ int discard = 0 /* false */;
+#endif
KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
gtid, taskdata, current_task) );
__kmp_task_start( gtid, task, current_task );
+#if OMP_40_ENABLED
+ // TODO: cancel tasks if the parallel region has also been cancelled
+ // TODO: check if this sequence can be hoisted above __kmp_task_start
+ // if cancellation has been enabled for this run ...
+ if (__kmp_omp_cancellation) {
+ kmp_info_t *this_thr = __kmp_threads [ gtid ];
+ kmp_team_t * this_team = this_thr->th.th_team;
+ kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
+ if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
+ // this task belongs to a task group and we need to cancel it
+ discard = 1 /* true */;
+ }
+ }
+
//
// Invoke the task routine and pass in relevant data.
// Thunks generated by gcc take a different argument list.
//
+ if (!discard) {
+#endif // OMP_40_ENABLED
#ifdef KMP_GOMP_COMPAT
- if (taskdata->td_flags.native) {
- ((void (*)(void *))(*(task->routine)))(task->shareds);
- }
- else
+ if (taskdata->td_flags.native) {
+ ((void (*)(void *))(*(task->routine)))(task->shareds);
+ }
+ else
#endif /* KMP_GOMP_COMPAT */
- {
- (*(task->routine))(gtid, task);
+ {
+ (*(task->routine))(gtid, task);
+ }
+#if OMP_40_ENABLED
}
+#endif // OMP_40_ENABLED
__kmp_task_finish( gtid, task, current_task );
@@ -1079,10 +1119,8 @@
// GEH: if team serialized, avoid reading the volatile variable below.
while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
__kmp_execute_tasks( thread, gtid, &(taskdata->td_incomplete_child_tasks),
- 0, FALSE, &thread_finished,
-#if USE_ITT_BUILD
- itt_sync_obj,
-#endif /* USE_ITT_BUILD */
+ 0, FALSE, &thread_finished
+ USE_ITT_BUILD_ARG(itt_sync_obj),
__kmp_task_stealing_constraint );
}
}
@@ -1134,10 +1172,8 @@
__kmp_itt_taskwait_starting( gtid, itt_sync_obj );
#endif /* USE_ITT_BUILD */
if ( ! taskdata->td_flags.team_serial ) {
- __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished,
-#if USE_ITT_BUILD
- itt_sync_obj,
-#endif /* USE_ITT_BUILD */
+ __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished
+ USE_ITT_BUILD_ARG(itt_sync_obj),
__kmp_task_stealing_constraint );
}
@@ -1162,7 +1198,7 @@
// __kmpc_taskgroup: Start a new taskgroup
void
-__kmpc_taskgroup( ident* loc, int gtid )
+__kmpc_taskgroup( ident_t* loc, int gtid )
{
kmp_info_t * thread = __kmp_threads[ gtid ];
kmp_taskdata_t * taskdata = thread->th.th_current_task;
@@ -1170,6 +1206,7 @@
(kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
tg_new->count = 0;
+ tg_new->cancel_request = cancel_noreq;
tg_new->parent = taskdata->td_taskgroup;
taskdata->td_taskgroup = tg_new;
}
@@ -1180,7 +1217,7 @@
// and its descendants are complete
void
-__kmpc_end_taskgroup( ident* loc, int gtid )
+__kmpc_end_taskgroup( ident_t* loc, int gtid )
{
kmp_info_t * thread = __kmp_threads[ gtid ];
kmp_taskdata_t * taskdata = thread->th.th_current_task;
@@ -1201,10 +1238,8 @@
if ( ! taskdata->td_flags.team_serial ) {
while ( TCR_4(taskgroup->count) != 0 ) {
__kmp_execute_tasks( thread, gtid, &(taskgroup->count),
- 0, FALSE, &thread_finished,
-#if USE_ITT_BUILD
- itt_sync_obj,
-#endif /* USE_ITT_BUILD */
+ 0, FALSE, &thread_finished
+ USE_ITT_BUILD_ARG(itt_sync_obj),
__kmp_task_stealing_constraint );
}
}
@@ -1420,15 +1455,13 @@
// checker is the value to check to terminate the spin.
int
-__kmp_execute_tasks( kmp_info_t *thread,
- kmp_int32 gtid,
+__kmp_execute_tasks( kmp_info_t *thread,
+ kmp_int32 gtid,
volatile kmp_uint *spinner,
kmp_uint checker,
- int final_spin,
- int *thread_finished,
-#if USE_ITT_BUILD
- void * itt_sync_obj,
-#endif /* USE_ITT_BUILD */
+ int final_spin,
+ int *thread_finished
+ USE_ITT_BUILD_ARG(void * itt_sync_obj),
kmp_int32 is_constrained )
{
kmp_task_team_t * task_team;
@@ -2297,11 +2330,9 @@
// in team > 1 !
void
-__kmp_task_team_wait( kmp_info_t *this_thr,
+__kmp_task_team_wait( kmp_info_t *this_thr,
kmp_team_t *team
-#if USE_ITT_BUILD
- , void * itt_sync_obj
-#endif /* USE_ITT_BUILD */
+ USE_ITT_BUILD_ARG(void * itt_sync_obj)
)
{
kmp_task_team_t *task_team = team->t.t_task_team;
@@ -2320,9 +2351,7 @@
// termination condition.
//
__kmp_wait_sleep( this_thr, &task_team->tt.tt_unfinished_threads, 0, TRUE
-#if USE_ITT_BUILD
- , itt_sync_obj
-#endif /* USE_ITT_BUILD */
+ USE_ITT_BUILD_ARG(itt_sync_obj)
);
//
@@ -2361,7 +2390,8 @@
#if USE_ITT_BUILD
KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
#endif /* USE_ITT_BUILD */
- while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag, NULL ) ) {
+ while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag
+ USE_ITT_BUILD_ARG(NULL), 0 ) ) {
#if USE_ITT_BUILD
// TODO: What about itt_sync_obj??
KMP_FSYNC_SPIN_PREPARE( spin );