blob: a4c3fd34422c9334dfe41677f048a7440899f3cc [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jonathan Peyton45be4502015-08-11 21:36:41 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanove5f44922015-04-29 16:22:07 +000022#if OMPT_SUPPORT
23#include "ompt-specific.h"
24#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
Jim Cownie5e8470a2013-09-27 10:38:44 +000026
27/* ------------------------------------------------------------------------ */
28/* ------------------------------------------------------------------------ */
29
30
31/* forward declaration */
32static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
33static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
34static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
35
Andrey Churbanov535b6fa2015-05-07 17:41:51 +000036#ifdef OMP_41_ENABLED
37static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
38#endif
39
Jim Cownie4cc4bb42014-10-07 16:25:50 +000040static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
Jonathan Peytona0e159f2015-10-08 18:23:38 +000041 if (!flag) return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +000042 switch (((kmp_flag_64 *)flag)->get_type()) {
43 case flag32: __kmp_resume_32(gtid, NULL); break;
44 case flag64: __kmp_resume_64(gtid, NULL); break;
45 case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
46 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000047}
48
49#ifdef BUILD_TIED_TASK_STACK
50
51//---------------------------------------------------------------------------
52// __kmp_trace_task_stack: print the tied tasks from the task stack in order
53// from top do bottom
54//
55// gtid: global thread identifier for thread containing stack
56// thread_data: thread data for task team thread containing stack
57// threshold: value above which the trace statement triggers
58// location: string identifying call site of this function (for trace)
59
60static void
61__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
62{
63 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
64 kmp_taskdata_t **stack_top = task_stack -> ts_top;
65 kmp_int32 entries = task_stack -> ts_entries;
66 kmp_taskdata_t *tied_task;
67
68 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
69 "first_block = %p, stack_top = %p \n",
70 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
71
72 KMP_DEBUG_ASSERT( stack_top != NULL );
73 KMP_DEBUG_ASSERT( entries > 0 );
74
75 while ( entries != 0 )
76 {
77 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
78 // fix up ts_top if we need to pop from previous block
79 if ( entries & TASK_STACK_INDEX_MASK == 0 )
80 {
81 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
82
83 stack_block = stack_block -> sb_prev;
84 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
85 }
86
87 // finish bookkeeping
88 stack_top--;
89 entries--;
90
91 tied_task = * stack_top;
92
93 KMP_DEBUG_ASSERT( tied_task != NULL );
94 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
95
96 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
97 "stack_top=%p, tied_task=%p\n",
98 location, gtid, entries, stack_top, tied_task ) );
99 }
100 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
101
102 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
103 location, gtid ) );
104}
105
106//---------------------------------------------------------------------------
107// __kmp_init_task_stack: initialize the task stack for the first time
108// after a thread_data structure is created.
109// It should not be necessary to do this again (assuming the stack works).
110//
111// gtid: global thread identifier of calling thread
112// thread_data: thread data for task team thread containing stack
113
114static void
115__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
116{
117 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
118 kmp_stack_block_t *first_block;
119
120 // set up the first block of the stack
121 first_block = & task_stack -> ts_first_block;
122 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
123 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
124
125 // initialize the stack to be empty
126 task_stack -> ts_entries = TASK_STACK_EMPTY;
127 first_block -> sb_next = NULL;
128 first_block -> sb_prev = NULL;
129}
130
131
132//---------------------------------------------------------------------------
133// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
134//
135// gtid: global thread identifier for calling thread
136// thread_data: thread info for thread containing stack
137
138static void
139__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
140{
141 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
142 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
143
144 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
145 // free from the second block of the stack
146 while ( stack_block != NULL ) {
147 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
148
149 stack_block -> sb_next = NULL;
150 stack_block -> sb_prev = NULL;
151 if (stack_block != & task_stack -> ts_first_block) {
152 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
153 }
154 stack_block = next_block;
155 }
156 // initialize the stack to be empty
157 task_stack -> ts_entries = 0;
158 task_stack -> ts_top = NULL;
159}
160
161
162//---------------------------------------------------------------------------
163// __kmp_push_task_stack: Push the tied task onto the task stack.
164// Grow the stack if necessary by allocating another block.
165//
166// gtid: global thread identifier for calling thread
167// thread: thread info for thread containing stack
168// tied_task: the task to push on the stack
169
170static void
171__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
172{
173 // GEH - need to consider what to do if tt_threads_data not allocated yet
174 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
175 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
176 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
177
178 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
179 return; // Don't push anything on stack if team or team tasks are serialized
180 }
181
182 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
183 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
184
185 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
186 gtid, thread, tied_task ) );
187 // Store entry
188 * (task_stack -> ts_top) = tied_task;
189
190 // Do bookkeeping for next push
191 task_stack -> ts_top++;
192 task_stack -> ts_entries++;
193
194 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
195 {
196 // Find beginning of this task block
197 kmp_stack_block_t *stack_block =
198 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
199
200 // Check if we already have a block
201 if ( stack_block -> sb_next != NULL )
202 { // reset ts_top to beginning of next block
203 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
204 }
205 else
206 { // Alloc new block and link it up
207 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
208 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
209
210 task_stack -> ts_top = & new_block -> sb_block[0];
211 stack_block -> sb_next = new_block;
212 new_block -> sb_prev = stack_block;
213 new_block -> sb_next = NULL;
214
215 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
216 gtid, tied_task, new_block ) );
217 }
218 }
219 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
220}
221
222//---------------------------------------------------------------------------
223// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
224// the task, just check to make sure it matches the ending task passed in.
225//
226// gtid: global thread identifier for the calling thread
227// thread: thread info structure containing stack
228// tied_task: the task popped off the stack
229// ending_task: the task that is ending (should match popped task)
230
231static void
232__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
233{
234 // GEH - need to consider what to do if tt_threads_data not allocated yet
235 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
236 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
237 kmp_taskdata_t *tied_task;
238
239 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
240 return; // Don't pop anything from stack if team or team tasks are serialized
241 }
242
243 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
244 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
245
246 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
247
248 // fix up ts_top if we need to pop from previous block
249 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
250 {
251 kmp_stack_block_t *stack_block =
252 (kmp_stack_block_t *) (task_stack -> ts_top) ;
253
254 stack_block = stack_block -> sb_prev;
255 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
256 }
257
258 // finish bookkeeping
259 task_stack -> ts_top--;
260 task_stack -> ts_entries--;
261
262 tied_task = * (task_stack -> ts_top );
263
264 KMP_DEBUG_ASSERT( tied_task != NULL );
265 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
266 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
267
268 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
269 return;
270}
271#endif /* BUILD_TIED_TASK_STACK */
272
273//---------------------------------------------------
274// __kmp_push_task: Add a task to the thread's deque
275
276static kmp_int32
277__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
278{
279 kmp_info_t * thread = __kmp_threads[ gtid ];
280 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
281 kmp_task_team_t * task_team = thread->th.th_task_team;
282 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
283 kmp_thread_data_t * thread_data;
284
285 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
286
287 // The first check avoids building task_team thread data if serialized
288 if ( taskdata->td_flags.task_serial ) {
289 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
290 gtid, taskdata ) );
291 return TASK_NOT_PUSHED;
292 }
293
294 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
295 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000296 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000297 __kmp_enable_tasking( task_team, thread );
298 }
299 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
300 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
301
302 // Find tasking deque specific to encountering thread
303 thread_data = & task_team -> tt.tt_threads_data[ tid ];
304
305 // No lock needed since only owner can allocate
306 if (thread_data -> td.td_deque == NULL ) {
307 __kmp_alloc_task_deque( thread, thread_data );
308 }
309
310 // Check if deque is full
311 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
312 {
313 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
314 gtid, taskdata ) );
315 return TASK_NOT_PUSHED;
316 }
317
318 // Lock the deque for the task push operation
319 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
320
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000321#if OMP_41_ENABLED
322 // Need to recheck as we can get a proxy task from a thread outside of OpenMP
323 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
324 {
325 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
326 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
327 gtid, taskdata ) );
328 return TASK_NOT_PUSHED;
329 }
330#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000331 // Must have room since no thread can add tasks but calling thread
332 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000333#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000334
335 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
336 // Wrap index.
337 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
338 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
339
340 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
341
342 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
343 "task=%p ntasks=%d head=%u tail=%u\n",
344 gtid, taskdata, thread_data->td.td_deque_ntasks,
345 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
346
347 return TASK_SUCCESSFULLY_PUSHED;
348}
349
350
351//-----------------------------------------------------------------------------------------
352// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
353// this_thr: thread structure to set current_task in.
354
355void
356__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
357{
358 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
359 "curtask_parent=%p\n",
360 0, this_thr, this_thr -> th.th_current_task,
361 this_thr -> th.th_current_task -> td_parent ) );
362
363 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
364
365 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
366 "curtask_parent=%p\n",
367 0, this_thr, this_thr -> th.th_current_task,
368 this_thr -> th.th_current_task -> td_parent ) );
369}
370
371
372//---------------------------------------------------------------------------------------
373// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
374// this_thr: thread structure to set up
375// team: team for implicit task data
376// tid: thread within team to set up
377
378void
379__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
380{
381 // current task of the thread is a parent of the new just created implicit tasks of new team
382 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
383 "parent_task=%p\n",
384 tid, this_thr, this_thr->th.th_current_task,
385 team->t.t_implicit_task_taskdata[tid].td_parent ) );
386
387 KMP_DEBUG_ASSERT (this_thr != NULL);
388
389 if( tid == 0 ) {
390 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
391 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
392 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
393 }
394 } else {
395 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
396 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
397 }
398
399 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
400 "parent_task=%p\n",
401 tid, this_thr, this_thr->th.th_current_task,
402 team->t.t_implicit_task_taskdata[tid].td_parent ) );
403}
404
405
406//----------------------------------------------------------------------
407// __kmp_task_start: bookkeeping for a task starting execution
408// GTID: global thread id of calling thread
409// task: task starting execution
410// current_task: task suspending
411
412static void
413__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
414{
415 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
416 kmp_info_t * thread = __kmp_threads[ gtid ];
417
418 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
419 gtid, taskdata, current_task) );
420
421 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
422
423 // mark currently executing task as suspended
424 // TODO: GEH - make sure root team implicit task is initialized properly.
425 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
426 current_task -> td_flags.executing = 0;
427
428 // Add task to stack if tied
429#ifdef BUILD_TIED_TASK_STACK
430 if ( taskdata -> td_flags.tiedness == TASK_TIED )
431 {
432 __kmp_push_task_stack( gtid, thread, taskdata );
433 }
434#endif /* BUILD_TIED_TASK_STACK */
435
436 // mark starting task as executing and as current task
437 thread -> th.th_current_task = taskdata;
438
439 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
440 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
441 taskdata -> td_flags.started = 1;
442 taskdata -> td_flags.executing = 1;
443 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
444 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
445
446 // GEH TODO: shouldn't we pass some sort of location identifier here?
447 // APT: yes, we will pass location here.
448 // need to store current thread state (in a thread or taskdata structure)
449 // before setting work_state, otherwise wrong state is set after end of task
450
451 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
452 gtid, taskdata ) );
453
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000454#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000455 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000456 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
457 kmp_taskdata_t *parent = taskdata->td_parent;
458 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
459 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
460 parent ? &(parent->ompt_task_info.frame) : NULL,
461 taskdata->ompt_task_info.task_id,
462 taskdata->ompt_task_info.function);
463 }
464#endif
465
Jim Cownie5e8470a2013-09-27 10:38:44 +0000466 return;
467}
468
469
470//----------------------------------------------------------------------
471// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
472// loc_ref: source location information; points to beginning of task block.
473// gtid: global thread number.
474// task: task thunk for the started task.
475
476void
477__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
478{
479 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
480 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
481
482 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
483 gtid, loc_ref, taskdata, current_task ) );
484
485 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
486 __kmp_task_start( gtid, task, current_task );
487
488 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
489 gtid, loc_ref, taskdata ) );
490
491 return;
492}
493
494#ifdef TASK_UNUSED
495//----------------------------------------------------------------------
496// __kmpc_omp_task_begin: report that a given task has started execution
497// NEVER GENERATED BY COMPILER, DEPRECATED!!!
498
499void
500__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
501{
502 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
503
504 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
505 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
506
507 __kmp_task_start( gtid, task, current_task );
508
509 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
510 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
511
512 return;
513}
514#endif // TASK_UNUSED
515
516
517//-------------------------------------------------------------------------------------
518// __kmp_free_task: free the current task space and the space for shareds
519// gtid: Global thread ID of calling thread
520// taskdata: task to free
521// thread: thread data structure of caller
522
523static void
524__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
525{
526 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
527 gtid, taskdata) );
528
529 // Check to make sure all flags and counters have the correct values
530 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
531 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
532 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
533 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
534 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
535 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
536
537 taskdata->td_flags.freed = 1;
538 // deallocate the taskdata and shared variable blocks associated with this task
539 #if USE_FAST_MEMORY
540 __kmp_fast_free( thread, taskdata );
541 #else /* ! USE_FAST_MEMORY */
542 __kmp_thread_free( thread, taskdata );
543 #endif
544
545 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
546 gtid, taskdata) );
547}
548
549//-------------------------------------------------------------------------------------
550// __kmp_free_task_and_ancestors: free the current task and ancestors without children
551//
552// gtid: Global thread ID of calling thread
553// taskdata: task to free
554// thread: thread data structure of caller
555
556static void
557__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
558{
559 kmp_int32 children = 0;
560 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
561
562 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
563
564 if ( !team_or_tasking_serialized ) {
565 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
566 KMP_DEBUG_ASSERT( children >= 0 );
567 }
568
569 // Now, go up the ancestor tree to see if any ancestors can now be freed.
570 while ( children == 0 )
571 {
572 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
573
574 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
575 "and freeing itself\n", gtid, taskdata) );
576
577 // --- Deallocate my ancestor task ---
578 __kmp_free_task( gtid, taskdata, thread );
579
580 taskdata = parent_taskdata;
581
582 // Stop checking ancestors at implicit task or if tasking serialized
583 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
584 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
585 return;
586
587 if ( !team_or_tasking_serialized ) {
588 // Predecrement simulated by "- 1" calculation
589 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
590 KMP_DEBUG_ASSERT( children >= 0 );
591 }
592 }
593
594 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
595 "not freeing it yet\n", gtid, taskdata, children) );
596}
597
598//---------------------------------------------------------------------
599// __kmp_task_finish: bookkeeping to do when a task finishes execution
600// gtid: global thread ID for calling thread
601// task: task to be finished
602// resumed_task: task to be resumed. (may be NULL if task is serialized)
603
604static void
605__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
606{
607 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
608 kmp_info_t * thread = __kmp_threads[ gtid ];
609 kmp_int32 children = 0;
610
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000611#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000612 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000613 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
614 kmp_taskdata_t *parent = taskdata->td_parent;
615 ompt_callbacks.ompt_callback(ompt_event_task_end)(
616 taskdata->ompt_task_info.task_id);
617 }
618#endif
619
Jim Cownie5e8470a2013-09-27 10:38:44 +0000620 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
621 gtid, taskdata, resumed_task) );
622
623 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
624
625 // Pop task from stack if tied
626#ifdef BUILD_TIED_TASK_STACK
627 if ( taskdata -> td_flags.tiedness == TASK_TIED )
628 {
629 __kmp_pop_task_stack( gtid, thread, taskdata );
630 }
631#endif /* BUILD_TIED_TASK_STACK */
632
Jim Cownie5e8470a2013-09-27 10:38:44 +0000633 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000634 taskdata -> td_flags.complete = 1; // mark the task as completed
635 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
636 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
637
638 // Only need to keep track of count if team parallel and tasking not serialized
639 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
640 // Predecrement simulated by "- 1" calculation
641 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
642 KMP_DEBUG_ASSERT( children >= 0 );
643#if OMP_40_ENABLED
644 if ( taskdata->td_taskgroup )
645 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000646 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000647#endif
648 }
649
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000650 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
651 // Othertwise, if a task is executed immediately from the release_deps code
652 // the flag will be reset to 1 again by this same function
653 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
654 taskdata -> td_flags.executing = 0; // suspend the finishing task
655
Jim Cownie5e8470a2013-09-27 10:38:44 +0000656 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
657 gtid, taskdata, children) );
658
Jim Cownie181b4bb2013-12-23 17:28:57 +0000659#if OMP_40_ENABLED
660 /* If the tasks' destructor thunk flag has been set, we need to invoke the
661 destructor thunk that has been generated by the compiler.
662 The code is placed here, since at this point other tasks might have been released
663 hence overlapping the destructor invokations with some other work in the
664 released tasks. The OpenMP spec is not specific on when the destructors are
665 invoked, so we should be free to choose.
666 */
667 if (taskdata->td_flags.destructors_thunk) {
668 kmp_routine_entry_t destr_thunk = task->destructors;
669 KMP_ASSERT(destr_thunk);
670 destr_thunk(gtid, task);
671 }
672#endif // OMP_40_ENABLED
673
Jim Cownie5e8470a2013-09-27 10:38:44 +0000674 // bookkeeping for resuming task:
675 // GEH - note tasking_ser => task_serial
676 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
677 taskdata->td_flags.task_serial);
678 if ( taskdata->td_flags.task_serial )
679 {
680 if (resumed_task == NULL) {
681 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
682 }
683 else {
684 // verify resumed task passed in points to parent
685 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
686 }
687 }
688 else {
689 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
690 }
691
692 // Free this task and then ancestor tasks if they have no children.
693 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
694
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000695 // FIXME johnmc: I this statement should be before the last one so if an
696 // asynchronous inquiry peers into the runtime system it doesn't see the freed
697 // task as the current task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000698 __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
699
700 // TODO: GEH - make sure root team implicit task is initialized properly.
701 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
702 resumed_task->td_flags.executing = 1; // resume previous task
703
704 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
705 gtid, taskdata, resumed_task) );
706
707 return;
708}
709
710//---------------------------------------------------------------------
711// __kmpc_omp_task_complete_if0: report that a task has completed execution
712// loc_ref: source location information; points to end of task block.
713// gtid: global thread number.
714// task: task thunk for the completed task.
715
716void
717__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
718{
719 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
720 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
721
722 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
723
724 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
725 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
726
727 return;
728}
729
730#ifdef TASK_UNUSED
731//---------------------------------------------------------------------
732// __kmpc_omp_task_complete: report that a task has completed execution
733// NEVER GENERATED BY COMPILER, DEPRECATED!!!
734
735void
736__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
737{
738 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
739 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
740
741 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
742
743 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
744 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
745 return;
746}
747#endif // TASK_UNUSED
748
749
Andrey Churbanove5f44922015-04-29 16:22:07 +0000750#if OMPT_SUPPORT
751//----------------------------------------------------------------------------------------------------
752// __kmp_task_init_ompt:
Jonathan Peytonb401db62015-10-09 17:38:05 +0000753// Initialize OMPT fields maintained by a task. This will only be called after
754// ompt_tool, so we already know whether ompt is enabled or not.
Andrey Churbanove5f44922015-04-29 16:22:07 +0000755
Jonathan Peytonb401db62015-10-09 17:38:05 +0000756static inline void
757__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function )
Andrey Churbanove5f44922015-04-29 16:22:07 +0000758{
Jonathan Peytonb401db62015-10-09 17:38:05 +0000759 if (ompt_enabled) {
760 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
761 task->ompt_task_info.function = function;
762 task->ompt_task_info.frame.exit_runtime_frame = NULL;
763 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
764 }
Andrey Churbanove5f44922015-04-29 16:22:07 +0000765}
766#endif
767
768
Jim Cownie5e8470a2013-09-27 10:38:44 +0000769//----------------------------------------------------------------------------------------------------
770// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
771//
772// loc_ref: reference to source location of parallel region
773// this_thr: thread data structure corresponding to implicit task
774// team: team for this_thr
775// tid: thread id of given thread within team
776// set_curr_task: TRUE if need to push current task to thread
777// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
778// TODO: Get better loc_ref. Value passed in may be NULL
779
780void
781__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
782{
783 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
784
785 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
786 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
787
788 task->td_task_id = KMP_GEN_TASK_ID();
789 task->td_team = team;
790// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
791 task->td_ident = loc_ref;
792 task->td_taskwait_ident = NULL;
793 task->td_taskwait_counter = 0;
794 task->td_taskwait_thread = 0;
795
796 task->td_flags.tiedness = TASK_TIED;
797 task->td_flags.tasktype = TASK_IMPLICIT;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000798#if OMP_41_ENABLED
799 task->td_flags.proxy = TASK_FULL;
800#endif
801
Jim Cownie5e8470a2013-09-27 10:38:44 +0000802 // All implicit tasks are executed immediately, not deferred
803 task->td_flags.task_serial = 1;
804 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
805 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
806
807 task->td_flags.started = 1;
808 task->td_flags.executing = 1;
809 task->td_flags.complete = 0;
810 task->td_flags.freed = 0;
811
Jim Cownie181b4bb2013-12-23 17:28:57 +0000812#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000813 task->td_dephash = NULL;
814 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000815#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000816
817 if (set_curr_task) { // only do this initialization the first time a thread is created
818 task->td_incomplete_child_tasks = 0;
819 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
820#if OMP_40_ENABLED
821 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
822#endif
823 __kmp_push_current_task_to_thread( this_thr, team, tid );
824 } else {
825 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
826 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
827 }
828
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000829#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +0000830 __kmp_task_init_ompt(task, tid, NULL);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000831#endif
832
Jim Cownie5e8470a2013-09-27 10:38:44 +0000833 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
834 tid, team, task ) );
835}
836
837// Round up a size to a power of two specified by val
838// Used to insert padding between structures co-allocated using a single malloc() call
839static size_t
840__kmp_round_up_to_val( size_t size, size_t val ) {
841 if ( size & ( val - 1 ) ) {
842 size &= ~ ( val - 1 );
843 if ( size <= KMP_SIZE_T_MAX - val ) {
844 size += val; // Round up if there is no overflow.
845 }; // if
846 }; // if
847 return size;
848} // __kmp_round_up_to_va
849
850
851//---------------------------------------------------------------------------------
852// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
853//
854// loc_ref: source location information
855// gtid: global thread number.
856// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
857// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
858// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
859// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
860// task_entry: Pointer to task code entry point generated by compiler.
861// returns: a pointer to the allocated kmp_task_t structure (task).
862
863kmp_task_t *
864__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
865 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
866 kmp_routine_entry_t task_entry )
867{
868 kmp_task_t *task;
869 kmp_taskdata_t *taskdata;
870 kmp_info_t *thread = __kmp_threads[ gtid ];
871 kmp_team_t *team = thread->th.th_team;
872 kmp_taskdata_t *parent_task = thread->th.th_current_task;
873 size_t shareds_offset;
874
875 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
876 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
877 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
878 sizeof_shareds, task_entry) );
879
880 if ( parent_task->td_flags.final ) {
881 if (flags->merged_if0) {
882 }
883 flags->final = 1;
884 }
885
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000886#if OMP_41_ENABLED
887 if ( flags->proxy == TASK_PROXY ) {
888 flags->tiedness = TASK_UNTIED;
889 flags->merged_if0 = 1;
890
891 /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
892 if ( (thread->th.th_task_team) == NULL ) {
893 /* This should only happen if the team is serialized
894 setup a task team and propagate it to the thread
895 */
896 KMP_DEBUG_ASSERT(team->t.t_serialized);
897 KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
Jonathan Peyton54127982015-11-04 21:37:48 +0000898 __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000899 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
900 }
901 kmp_task_team_t * task_team = thread->th.th_task_team;
902
903 /* tasking must be enabled now as the task might not be pushed */
904 if ( !KMP_TASKING_ENABLED( task_team ) ) {
905 KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
906 __kmp_enable_tasking( task_team, thread );
907 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
908 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
909 // No lock needed since only owner can allocate
910 if (thread_data -> td.td_deque == NULL ) {
911 __kmp_alloc_task_deque( thread, thread_data );
912 }
913 }
914
915 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
916 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
917 }
918#endif
919
Jim Cownie5e8470a2013-09-27 10:38:44 +0000920 // Calculate shared structure offset including padding after kmp_task_t struct
921 // to align pointers in shared struct
922 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
923 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
924
925 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
926 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
927 gtid, shareds_offset) );
928 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
929 gtid, sizeof_shareds) );
930
931 // Avoid double allocation here by combining shareds with taskdata
932 #if USE_FAST_MEMORY
933 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
934 #else /* ! USE_FAST_MEMORY */
935 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
936 #endif /* USE_FAST_MEMORY */
937
938 task = KMP_TASKDATA_TO_TASK(taskdata);
939
940 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000941#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000942 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
943 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
944#else
945 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
946 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
947#endif
948 if (sizeof_shareds > 0) {
949 // Avoid double allocation here by combining shareds with taskdata
950 task->shareds = & ((char *) taskdata)[ shareds_offset ];
951 // Make sure shareds struct is aligned to pointer size
952 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
953 } else {
954 task->shareds = NULL;
955 }
956 task->routine = task_entry;
957 task->part_id = 0; // AC: Always start with 0 part id
958
959 taskdata->td_task_id = KMP_GEN_TASK_ID();
960 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000961 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000962 taskdata->td_parent = parent_task;
963 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
964 taskdata->td_ident = loc_ref;
965 taskdata->td_taskwait_ident = NULL;
966 taskdata->td_taskwait_counter = 0;
967 taskdata->td_taskwait_thread = 0;
968 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000969#if OMP_41_ENABLED
970 // avoid copying icvs for proxy tasks
971 if ( flags->proxy == TASK_FULL )
972#endif
973 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000974
975 taskdata->td_flags.tiedness = flags->tiedness;
976 taskdata->td_flags.final = flags->final;
977 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000978#if OMP_40_ENABLED
979 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
980#endif // OMP_40_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000981#if OMP_41_ENABLED
982 taskdata->td_flags.proxy = flags->proxy;
983#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000984 taskdata->td_flags.tasktype = TASK_EXPLICIT;
985
986 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
987 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
988
989 // GEH - TODO: fix this to copy parent task's value of team_serial flag
990 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
991
992 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
993 // tasks are not left until program termination to execute. Also, it helps locality to execute
994 // immediately.
Jonathan Peyton7881aa12015-05-21 21:16:38 +0000995 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
Jim Cownie5e8470a2013-09-27 10:38:44 +0000996 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
997
998 taskdata->td_flags.started = 0;
999 taskdata->td_flags.executing = 0;
1000 taskdata->td_flags.complete = 0;
1001 taskdata->td_flags.freed = 0;
1002
1003 taskdata->td_flags.native = flags->native;
1004
1005 taskdata->td_incomplete_child_tasks = 0;
1006 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1007#if OMP_40_ENABLED
1008 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1009 taskdata->td_dephash = NULL;
1010 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001011#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001012
1013 // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
1014#if OMP_41_ENABLED
1015 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1016#else
1017 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1018#endif
1019 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001020 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1021#if OMP_40_ENABLED
1022 if ( parent_task->td_taskgroup )
1023 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1024#endif
1025 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1026 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1027 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1028 }
1029 }
1030
1031 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1032 gtid, taskdata, taskdata->td_parent) );
1033
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001034#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +00001035 __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001036#endif
1037
Jim Cownie5e8470a2013-09-27 10:38:44 +00001038 return task;
1039}
1040
1041
1042kmp_task_t *
1043__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1044 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1045 kmp_routine_entry_t task_entry )
1046{
1047 kmp_task_t *retval;
1048 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1049
1050 input_flags->native = FALSE;
1051 // __kmp_task_alloc() sets up all other runtime flags
1052
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001053#if OMP_41_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001054 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001055 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1056 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001057 input_flags->proxy ? "proxy" : "",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001058 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001059#else
1060 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1061 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1062 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1063 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1064#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001065
1066 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1067 sizeof_shareds, task_entry );
1068
1069 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1070
1071 return retval;
1072}
1073
1074//-----------------------------------------------------------
1075// __kmp_invoke_task: invoke the specified task
1076//
1077// gtid: global thread ID of caller
1078// task: the task to invoke
1079// current_task: the task to resume after task invokation
1080
1081static void
1082__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1083{
1084 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001085#if OMP_40_ENABLED
1086 int discard = 0 /* false */;
1087#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001088 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1089 gtid, taskdata, current_task) );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00001090 KMP_DEBUG_ASSERT(task);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001091#if OMP_41_ENABLED
1092 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1093 taskdata->td_flags.complete == 1)
1094 {
1095 // This is a proxy task that was already completed but it needs to run
1096 // its bottom-half finish
1097 KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1098 gtid, taskdata) );
1099
1100 __kmp_bottom_half_finish_proxy(gtid,task);
1101
1102 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1103
1104 return;
1105 }
1106#endif
1107
1108#if OMP_41_ENABLED
1109 // Proxy tasks are not handled by the runtime
1110 if ( taskdata->td_flags.proxy != TASK_PROXY )
1111#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001112 __kmp_task_start( gtid, task, current_task );
1113
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001114#if OMPT_SUPPORT
1115 ompt_thread_info_t oldInfo;
1116 kmp_info_t * thread;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001117 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001118 // Store the threads states and restore them after the task
1119 thread = __kmp_threads[ gtid ];
1120 oldInfo = thread->th.ompt_thread_info;
1121 thread->th.ompt_thread_info.wait_id = 0;
1122 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1123 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1124 }
1125#endif
1126
Jim Cownie181b4bb2013-12-23 17:28:57 +00001127#if OMP_40_ENABLED
1128 // TODO: cancel tasks if the parallel region has also been cancelled
1129 // TODO: check if this sequence can be hoisted above __kmp_task_start
1130 // if cancellation has been enabled for this run ...
1131 if (__kmp_omp_cancellation) {
1132 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1133 kmp_team_t * this_team = this_thr->th.th_team;
1134 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1135 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001136 KMP_COUNT_BLOCK(TASK_cancelled);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001137 // this task belongs to a task group and we need to cancel it
1138 discard = 1 /* true */;
1139 }
1140 }
1141
Jim Cownie5e8470a2013-09-27 10:38:44 +00001142 //
1143 // Invoke the task routine and pass in relevant data.
1144 // Thunks generated by gcc take a different argument list.
1145 //
Jim Cownie181b4bb2013-12-23 17:28:57 +00001146 if (!discard) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001147 KMP_COUNT_BLOCK(TASK_executed);
1148 KMP_TIME_BLOCK (TASK_execution);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001149#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001150#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +00001151 if (taskdata->td_flags.native) {
1152 ((void (*)(void *))(*(task->routine)))(task->shareds);
1153 }
1154 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001155#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001156 {
1157 (*(task->routine))(gtid, task);
1158 }
1159#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001160 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001161#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001162
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001163
1164#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001165 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001166 thread->th.ompt_thread_info = oldInfo;
1167 taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1168 }
1169#endif
1170
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001171#if OMP_41_ENABLED
1172 // Proxy tasks are not handled by the runtime
1173 if ( taskdata->td_flags.proxy != TASK_PROXY )
1174#endif
1175 __kmp_task_finish( gtid, task, current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001176
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001177 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001178 gtid, taskdata, current_task) );
1179 return;
1180}
1181
1182//-----------------------------------------------------------------------
1183// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1184//
1185// loc_ref: location of original task pragma (ignored)
1186// gtid: Global Thread ID of encountering thread
1187// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1188// Returns:
1189// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1190// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1191
1192kmp_int32
1193__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1194{
1195 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1196
1197 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1198 gtid, loc_ref, new_taskdata ) );
1199
1200 /* Should we execute the new task or queue it? For now, let's just always try to
1201 queue it. If the queue fills up, then we'll execute it. */
1202
1203 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1204 { // Execute this task immediately
1205 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1206 new_taskdata->td_flags.task_serial = 1;
1207 __kmp_invoke_task( gtid, new_task, current_task );
1208 }
1209
1210 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1211 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1212 new_taskdata ) );
1213
1214 return TASK_CURRENT_NOT_QUEUED;
1215}
1216
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001217//---------------------------------------------------------------------
1218// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1219// gtid: Global Thread ID of encountering thread
1220// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1221// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1222// returns:
1223//
1224// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1225// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1226kmp_int32
1227__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1228{
1229 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1230
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001231#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001232 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001233 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1234 __builtin_frame_address(0);
1235 }
1236#endif
1237
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001238 /* Should we execute the new task or queue it? For now, let's just always try to
1239 queue it. If the queue fills up, then we'll execute it. */
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001240#if OMP_41_ENABLED
1241 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1242#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001243 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001244#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001245 { // Execute this task immediately
1246 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1247 if ( serialize_immediate )
1248 new_taskdata -> td_flags.task_serial = 1;
1249 __kmp_invoke_task( gtid, new_task, current_task );
1250 }
1251
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001252#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001253 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001254 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1255 }
1256#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001257
1258 return TASK_CURRENT_NOT_QUEUED;
1259}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001260
1261//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001262// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1263// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001264// loc_ref: location of original task pragma (ignored)
1265// gtid: Global Thread ID of encountering thread
1266// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1267// returns:
1268//
1269// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1270// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1271
1272kmp_int32
1273__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1274{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001275 kmp_int32 res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001276
Jonathan Peytond2eb3c72015-08-26 20:02:21 +00001277#if KMP_DEBUG
1278 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1279#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001280 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1281 gtid, loc_ref, new_taskdata ) );
1282
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001283 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001284
1285 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1286 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001287 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001288}
1289
Jim Cownie5e8470a2013-09-27 10:38:44 +00001290//-------------------------------------------------------------------------------------
1291// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1292
1293kmp_int32
1294__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1295{
1296 kmp_taskdata_t * taskdata;
1297 kmp_info_t * thread;
1298 int thread_finished = FALSE;
1299
Jonathan Peyton54127982015-11-04 21:37:48 +00001300 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001301
1302 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1303 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1304
1305 thread = __kmp_threads[ gtid ];
1306 taskdata = thread -> th.th_current_task;
1307#if USE_ITT_BUILD
1308 // Note: These values are used by ITT events as well.
1309#endif /* USE_ITT_BUILD */
1310 taskdata->td_taskwait_counter += 1;
1311 taskdata->td_taskwait_ident = loc_ref;
1312 taskdata->td_taskwait_thread = gtid + 1;
1313
1314#if USE_ITT_BUILD
1315 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1316 if ( itt_sync_obj != NULL )
1317 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1318#endif /* USE_ITT_BUILD */
1319
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001320#if OMP_41_ENABLED
1321 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1322#else
1323 if ( ! taskdata->td_flags.team_serial )
1324#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001325 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001326 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001327 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001328 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001329 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1330 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001331 }
1332 }
1333#if USE_ITT_BUILD
1334 if ( itt_sync_obj != NULL )
1335 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1336#endif /* USE_ITT_BUILD */
1337
1338 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1339 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1340 }
1341
1342 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1343 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1344
1345 return TASK_CURRENT_NOT_QUEUED;
1346}
1347
1348
1349//-------------------------------------------------
1350// __kmpc_omp_taskyield: switch to a different task
1351
1352kmp_int32
1353__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1354{
1355 kmp_taskdata_t * taskdata;
1356 kmp_info_t * thread;
1357 int thread_finished = FALSE;
1358
Jonathan Peyton45be4502015-08-11 21:36:41 +00001359 KMP_COUNT_BLOCK(OMP_TASKYIELD);
1360
Jim Cownie5e8470a2013-09-27 10:38:44 +00001361 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1362 gtid, loc_ref, end_part) );
1363
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001364 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001365 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1366
1367 thread = __kmp_threads[ gtid ];
1368 taskdata = thread -> th.th_current_task;
1369 // Should we model this as a task wait or not?
1370#if USE_ITT_BUILD
1371 // Note: These values are used by ITT events as well.
1372#endif /* USE_ITT_BUILD */
1373 taskdata->td_taskwait_counter += 1;
1374 taskdata->td_taskwait_ident = loc_ref;
1375 taskdata->td_taskwait_thread = gtid + 1;
1376
1377#if USE_ITT_BUILD
1378 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1379 if ( itt_sync_obj != NULL )
1380 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1381#endif /* USE_ITT_BUILD */
1382 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001383 kmp_task_team_t * task_team = thread->th.th_task_team;
1384 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001385 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001386 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1387 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1388 }
1389 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001390 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001391#if USE_ITT_BUILD
1392 if ( itt_sync_obj != NULL )
1393 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1394#endif /* USE_ITT_BUILD */
1395
1396 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1397 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1398 }
1399
1400 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1401 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1402
1403 return TASK_CURRENT_NOT_QUEUED;
1404}
1405
1406
1407#if OMP_40_ENABLED
1408//-------------------------------------------------------------------------------------
1409// __kmpc_taskgroup: Start a new taskgroup
1410
1411void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001412__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001413{
1414 kmp_info_t * thread = __kmp_threads[ gtid ];
1415 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1416 kmp_taskgroup_t * tg_new =
1417 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1418 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1419 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001420 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001421 tg_new->parent = taskdata->td_taskgroup;
1422 taskdata->td_taskgroup = tg_new;
1423}
1424
1425
1426//-------------------------------------------------------------------------------------
1427// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1428// and its descendants are complete
1429
1430void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001431__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001432{
1433 kmp_info_t * thread = __kmp_threads[ gtid ];
1434 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1435 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1436 int thread_finished = FALSE;
1437
1438 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1439 KMP_DEBUG_ASSERT( taskgroup != NULL );
1440
1441 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1442#if USE_ITT_BUILD
1443 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1444 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1445 if ( itt_sync_obj != NULL )
1446 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1447#endif /* USE_ITT_BUILD */
1448
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001449#if OMP_41_ENABLED
1450 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1451#else
1452 if ( ! taskdata->td_flags.team_serial )
1453#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001454 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001455 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001456 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001457 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1458 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001459 }
1460 }
1461
1462#if USE_ITT_BUILD
1463 if ( itt_sync_obj != NULL )
1464 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1465#endif /* USE_ITT_BUILD */
1466 }
1467 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1468
1469 // Restore parent taskgroup for the current task
1470 taskdata->td_taskgroup = taskgroup->parent;
1471 __kmp_thread_free( thread, taskgroup );
1472
1473 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1474}
1475#endif
1476
1477
1478//------------------------------------------------------
1479// __kmp_remove_my_task: remove a task from my own deque
1480
1481static kmp_task_t *
1482__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1483 kmp_int32 is_constrained )
1484{
1485 kmp_task_t * task;
1486 kmp_taskdata_t * taskdata;
1487 kmp_thread_data_t *thread_data;
1488 kmp_uint32 tail;
1489
1490 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1491 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1492
1493 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1494
1495 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1496 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1497 thread_data->td.td_deque_tail) );
1498
1499 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1500 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1501 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1502 thread_data->td.td_deque_tail) );
1503 return NULL;
1504 }
1505
1506 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1507
1508 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1509 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1510 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1511 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1512 thread_data->td.td_deque_tail) );
1513 return NULL;
1514 }
1515
1516 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1517 taskdata = thread_data -> td.td_deque[ tail ];
1518
1519 if (is_constrained) {
1520 // we need to check if the candidate obeys task scheduling constraint:
1521 // only child of current task can be scheduled
1522 kmp_taskdata_t * current = thread->th.th_current_task;
1523 kmp_int32 level = current->td_level;
1524 kmp_taskdata_t * parent = taskdata->td_parent;
1525 while ( parent != current && parent->td_level > level ) {
1526 parent = parent->td_parent; // check generation up to the level of the current task
1527 KMP_DEBUG_ASSERT(parent != NULL);
1528 }
1529 if ( parent != current ) {
1530 // If the tail task is not a child, then no other childs can appear in the deque.
1531 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1532 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1533 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1534 thread_data->td.td_deque_tail) );
1535 return NULL;
1536 }
1537 }
1538
1539 thread_data -> td.td_deque_tail = tail;
1540 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1541
1542 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1543
1544 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1545 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1546 thread_data->td.td_deque_tail) );
1547
1548 task = KMP_TASKDATA_TO_TASK( taskdata );
1549 return task;
1550}
1551
1552
1553//-----------------------------------------------------------
1554// __kmp_steal_task: remove a task from another thread's deque
1555// Assume that calling thread has already checked existence of
1556// task_team thread_data before calling this routine.
1557
1558static kmp_task_t *
1559__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1560 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1561 kmp_int32 is_constrained )
1562{
1563 kmp_task_t * task;
1564 kmp_taskdata_t * taskdata;
1565 kmp_thread_data_t *victim_td, *threads_data;
Jonathan Peyton7c4d66d2015-06-08 20:01:14 +00001566 kmp_int32 victim_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001567
1568 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1569
1570 threads_data = task_team -> tt.tt_threads_data;
1571 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1572
1573 victim_tid = victim->th.th_info.ds.ds_tid;
1574 victim_td = & threads_data[ victim_tid ];
1575
1576 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1577 "head=%u tail=%u\n",
1578 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1579 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1580
1581 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1582 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1583 {
1584 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1585 "ntasks=%d head=%u tail=%u\n",
1586 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1587 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1588 return NULL;
1589 }
1590
1591 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1592
1593 // Check again after we acquire the lock
1594 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1595 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1596 {
1597 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1598 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1599 "ntasks=%d head=%u tail=%u\n",
1600 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1601 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1602 return NULL;
1603 }
1604
1605 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1606
1607 if ( !is_constrained ) {
1608 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1609 // Bump head pointer and Wrap.
1610 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1611 } else {
1612 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1613 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1614 taskdata = victim_td -> td.td_deque[ tail ];
1615 // we need to check if the candidate obeys task scheduling constraint:
1616 // only child of current task can be scheduled
1617 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1618 kmp_int32 level = current->td_level;
1619 kmp_taskdata_t * parent = taskdata->td_parent;
1620 while ( parent != current && parent->td_level > level ) {
1621 parent = parent->td_parent; // check generation up to the level of the current task
1622 KMP_DEBUG_ASSERT(parent != NULL);
1623 }
1624 if ( parent != current ) {
1625 // If the tail task is not a child, then no other childs can appear in the deque (?).
1626 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1627 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1628 "ntasks=%d head=%u tail=%u\n",
1629 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1630 task_team, victim_td->td.td_deque_ntasks,
1631 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1632 return NULL;
1633 }
1634 victim_td -> td.td_deque_tail = tail;
1635 }
1636 if (*thread_finished) {
1637 // We need to un-mark this victim as a finished victim. This must be done before
1638 // releasing the lock, or else other threads (starting with the master victim)
1639 // might be prematurely released from the barrier!!!
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001640 kmp_uint32 count;
1641
1642 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001643
1644 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1645 gtid, count + 1, task_team) );
1646
1647 *thread_finished = FALSE;
1648 }
1649 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1650
1651 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1652
Jonathan Peyton45be4502015-08-11 21:36:41 +00001653 KMP_COUNT_BLOCK(TASK_stolen);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001654 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001655 "ntasks=%d head=%u tail=%u\n",
1656 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1657 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1658 victim_td->td.td_deque_tail) );
1659
1660 task = KMP_TASKDATA_TO_TASK( taskdata );
1661 return task;
1662}
1663
1664
1665//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001666// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001667// is statisfied (return true) or there are none left (return false).
1668// final_spin is TRUE if this is the spin at the release barrier.
1669// thread_finished indicates whether the thread is finished executing all
1670// the tasks it has on its deque, and is at the release barrier.
1671// spinner is the location on which to spin.
1672// spinner == NULL means only execute a single task and return.
1673// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001674template <class C>
1675static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1676 int *thread_finished
1677 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001678{
1679 kmp_task_team_t * task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001680 kmp_thread_data_t * threads_data;
1681 kmp_task_t * task;
1682 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1683 volatile kmp_uint32 * unfinished_threads;
1684 kmp_int32 nthreads, last_stolen, k, tid;
1685
1686 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1687 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1688
1689 task_team = thread -> th.th_task_team;
Jonathan Peyton54127982015-11-04 21:37:48 +00001690 if (task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001691
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001692 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001693 gtid, final_spin, *thread_finished) );
1694
1695 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1696 KMP_DEBUG_ASSERT( threads_data != NULL );
1697
1698 nthreads = task_team -> tt.tt_nproc;
1699 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001700#if OMP_41_ENABLED
1701 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1702#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001703 KMP_DEBUG_ASSERT( nthreads > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001704#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001705 KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1706
1707 // Choose tasks from our own work queue.
1708 start:
1709 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1710#if USE_ITT_BUILD && USE_ITT_NOTIFY
1711 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1712 if ( itt_sync_obj == NULL ) {
1713 // we are at fork barrier where we could not get the object reliably
1714 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1715 }
1716 __kmp_itt_task_starting( itt_sync_obj );
1717 }
1718#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1719 __kmp_invoke_task( gtid, task, current_task );
1720#if USE_ITT_BUILD
1721 if ( itt_sync_obj != NULL )
1722 __kmp_itt_task_finished( itt_sync_obj );
1723#endif /* USE_ITT_BUILD */
1724
1725 // If this thread is only partway through the barrier and the condition
1726 // is met, then return now, so that the barrier gather/release pattern can proceed.
1727 // If this thread is in the last spin loop in the barrier, waiting to be
1728 // released, we know that the termination condition will not be satisified,
1729 // so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001730 if (flag == NULL || (!final_spin && flag->done_check())) {
1731 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001732 return TRUE;
1733 }
Jonathan Peyton54127982015-11-04 21:37:48 +00001734 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001735 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1736 }
1737
1738 // This thread's work queue is empty. If we are in the final spin loop
1739 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001740#if OMP_41_ENABLED
1741 // The work queue may be empty but there might be proxy tasks still executing
1742 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1743#else
1744 if (final_spin)
1745#endif
1746 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001747 // First, decrement the #unfinished threads, if that has not already
1748 // been done. This decrement might be to the spin location, and
1749 // result in the termination condition being satisfied.
1750 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001751 kmp_uint32 count;
1752
1753 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001754 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001755 gtid, count, task_team) );
1756 *thread_finished = TRUE;
1757 }
1758
1759 // It is now unsafe to reference thread->th.th_team !!!
1760 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1761 // thread to pass through the barrier, where it might reset each thread's
1762 // th.th_team field for the next parallel region.
1763 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001764 if (flag != NULL && flag->done_check()) {
1765 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001766 return TRUE;
1767 }
1768 }
1769
Jonathan Peyton54127982015-11-04 21:37:48 +00001770 if (thread->th.th_task_team == NULL) return FALSE;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001771#if OMP_41_ENABLED
1772 // check if there are other threads to steal from, otherwise go back
1773 if ( nthreads == 1 )
1774 goto start;
1775#endif
1776
Jim Cownie5e8470a2013-09-27 10:38:44 +00001777 // Try to steal from the last place I stole from successfully.
1778 tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1779 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1780
1781 if (last_stolen != -1) {
1782 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1783
1784 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1785 thread_finished, is_constrained )) != NULL)
1786 {
1787#if USE_ITT_BUILD && USE_ITT_NOTIFY
1788 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1789 if ( itt_sync_obj == NULL ) {
1790 // we are at fork barrier where we could not get the object reliably
1791 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1792 }
1793 __kmp_itt_task_starting( itt_sync_obj );
1794 }
1795#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1796 __kmp_invoke_task( gtid, task, current_task );
1797#if USE_ITT_BUILD
1798 if ( itt_sync_obj != NULL )
1799 __kmp_itt_task_finished( itt_sync_obj );
1800#endif /* USE_ITT_BUILD */
1801
1802 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001803 if (flag == NULL || (!final_spin && flag->done_check())) {
1804 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001805 gtid) );
1806 return TRUE;
1807 }
1808
Jonathan Peyton54127982015-11-04 21:37:48 +00001809 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001810 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1811 // If the execution of the stolen task resulted in more tasks being
1812 // placed on our run queue, then restart the whole process.
1813 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001814 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001815 gtid) );
1816 goto start;
1817 }
1818 }
1819
1820 // Don't give priority to stealing from this thread anymore.
1821 threads_data[ tid ].td.td_deque_last_stolen = -1;
1822
1823 // The victims's work queue is empty. If we are in the final spin loop
1824 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001825#if OMP_41_ENABLED
1826 // The work queue may be empty but there might be proxy tasks still executing
1827 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1828#else
1829 if (final_spin)
1830#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001831 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001832 // First, decrement the #unfinished threads, if that has not already
1833 // been done. This decrement might be to the spin location, and
1834 // result in the termination condition being satisfied.
1835 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001836 kmp_uint32 count;
1837
1838 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001839 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001840 "task_team=%p\n", gtid, count, task_team) );
1841 *thread_finished = TRUE;
1842 }
1843
1844 // If __kmp_tasking_mode != tskm_immediate_exec
1845 // then it is now unsafe to reference thread->th.th_team !!!
1846 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1847 // thread to pass through the barrier, where it might reset each thread's
1848 // th.th_team field for the next parallel region.
1849 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001850 if (flag != NULL && flag->done_check()) {
1851 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001852 gtid) );
1853 return TRUE;
1854 }
1855 }
Jonathan Peyton54127982015-11-04 21:37:48 +00001856 if (thread->th.th_task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001857 }
1858
1859 // Find a different thread to steal work from. Pick a random thread.
1860 // My initial plan was to cycle through all the threads, and only return
1861 // if we tried to steal from every thread, and failed. Arch says that's
1862 // not such a great idea.
1863 // GEH - need yield code in this loop for throughput library mode?
1864 new_victim:
1865 k = __kmp_get_random( thread ) % (nthreads - 1);
1866 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1867 ++k; // Adjusts random distribution to exclude self
1868 }
1869 {
1870 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1871 int first;
1872
1873 // There is a slight chance that __kmp_enable_tasking() did not wake up
1874 // all threads waiting at the barrier. If this thread is sleeping, then
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001875 // wake it up. Since we were going to pay the cache miss penalty
1876 // for referencing another thread's kmp_info_t struct anyway, the check
Jim Cownie5e8470a2013-09-27 10:38:44 +00001877 // shouldn't cost too much performance at this point.
1878 // In extra barrier mode, tasks do not sleep at the separate tasking
1879 // barrier, so this isn't a problem.
1880 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1881 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1882 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1883 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001884 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001885 // A sleeping thread should not have any tasks on it's queue.
Alp Toker8f2d3f02014-02-24 10:40:15 +00001886 // There is a slight possibility that it resumes, steals a task from
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001887 // another thread, which spawns more tasks, all in the time that it takes
Jim Cownie5e8470a2013-09-27 10:38:44 +00001888 // this thread to check => don't write an assertion that the victim's
1889 // queue is empty. Try stealing from a different thread.
1890 goto new_victim;
1891 }
1892
1893 // Now try to steal work from the selected thread
1894 first = TRUE;
1895 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1896 thread_finished, is_constrained )) != NULL)
1897 {
1898#if USE_ITT_BUILD && USE_ITT_NOTIFY
1899 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1900 if ( itt_sync_obj == NULL ) {
1901 // we are at fork barrier where we could not get the object reliably
1902 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1903 }
1904 __kmp_itt_task_starting( itt_sync_obj );
1905 }
1906#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1907 __kmp_invoke_task( gtid, task, current_task );
1908#if USE_ITT_BUILD
1909 if ( itt_sync_obj != NULL )
1910 __kmp_itt_task_finished( itt_sync_obj );
1911#endif /* USE_ITT_BUILD */
1912
1913 // Try stealing from this victim again, in the future.
1914 if (first) {
1915 threads_data[ tid ].td.td_deque_last_stolen = k;
1916 first = FALSE;
1917 }
1918
1919 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001920 if (flag == NULL || (!final_spin && flag->done_check())) {
1921 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001922 gtid) );
1923 return TRUE;
1924 }
Jonathan Peyton54127982015-11-04 21:37:48 +00001925 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001926 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1927
1928 // If the execution of the stolen task resulted in more tasks being
1929 // placed on our run queue, then restart the whole process.
1930 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001931 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001932 gtid) );
1933 goto start;
1934 }
1935 }
1936
1937 // The victims's work queue is empty. If we are in the final spin loop
1938 // of the barrier, check and see if the termination condition is satisfied.
1939 // Going on and finding a new victim to steal from is expensive, as it
1940 // involves a lot of cache misses, so we definitely want to re-check the
1941 // termination condition before doing that.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001942#if OMP_41_ENABLED
1943 // The work queue may be empty but there might be proxy tasks still executing
1944 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1945#else
1946 if (final_spin)
1947#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001948 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001949 // First, decrement the #unfinished threads, if that has not already
1950 // been done. This decrement might be to the spin location, and
1951 // result in the termination condition being satisfied.
1952 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001953 kmp_uint32 count;
1954
1955 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001956 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001957 "task_team=%p\n",
1958 gtid, count, task_team) );
1959 *thread_finished = TRUE;
1960 }
1961
1962 // If __kmp_tasking_mode != tskm_immediate_exec,
1963 // then it is now unsafe to reference thread->th.th_team !!!
1964 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1965 // thread to pass through the barrier, where it might reset each thread's
1966 // th.th_team field for the next parallel region.
1967 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001968 if (flag != NULL && flag->done_check()) {
1969 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001970 return TRUE;
1971 }
1972 }
Jonathan Peyton54127982015-11-04 21:37:48 +00001973 if (thread->th.th_task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001974 }
1975
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001976 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001977 return FALSE;
1978}
1979
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001980int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1981 int *thread_finished
1982 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1983{
1984 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1985 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1986}
1987
1988int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
1989 int *thread_finished
1990 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1991{
1992 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1993 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1994}
1995
1996int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
1997 int *thread_finished
1998 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1999{
2000 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2001 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2002}
2003
2004
Jim Cownie5e8470a2013-09-27 10:38:44 +00002005
2006//-----------------------------------------------------------------------------
2007// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2008// next barrier so they can assist in executing enqueued tasks.
2009// First thread in allocates the task team atomically.
2010
2011static void
2012__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2013{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002014 kmp_thread_data_t *threads_data;
2015 int nthreads, i, is_init_thread;
2016
2017 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2018 __kmp_gtid_from_thread( this_thr ) ) );
2019
2020 KMP_DEBUG_ASSERT(task_team != NULL);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002021 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002022
2023 nthreads = task_team->tt.tt_nproc;
2024 KMP_DEBUG_ASSERT(nthreads > 0);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002025 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002026
2027 // Allocate or increase the size of threads_data if necessary
2028 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2029
2030 if (!is_init_thread) {
2031 // Some other thread already set up the array.
2032 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2033 __kmp_gtid_from_thread( this_thr ) ) );
2034 return;
2035 }
2036 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2037 KMP_DEBUG_ASSERT( threads_data != NULL );
2038
2039 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2040 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2041 {
2042 // Release any threads sleeping at the barrier, so that they can steal
2043 // tasks and execute them. In extra barrier mode, tasks do not sleep
2044 // at the separate tasking barrier, so this isn't a problem.
2045 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002046 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002047 kmp_info_t *thread = threads_data[i].td.td_thr;
2048
2049 if (i == this_thr->th.th_info.ds.ds_tid) {
2050 continue;
2051 }
2052 // Since we haven't locked the thread's suspend mutex lock at this
2053 // point, there is a small window where a thread might be putting
2054 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002055 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002056 // see if other threads are sleeping (using the same random
2057 // mechanism that is used for task stealing) and awakens them if
2058 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002059 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002060 {
2061 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2062 __kmp_gtid_from_thread( this_thr ),
2063 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002064 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002065 }
2066 else {
2067 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2068 __kmp_gtid_from_thread( this_thr ),
2069 __kmp_gtid_from_thread( thread ) ) );
2070 }
2071 }
2072 }
2073
2074 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2075 __kmp_gtid_from_thread( this_thr ) ) );
2076}
2077
2078
2079/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002080/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00002081 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2082 * like a shadow of the kmp_team_t data struct, with a different lifetime.
2083 * After a child * thread checks into a barrier and calls __kmp_release() from
2084 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2085 * longer assume that the kmp_team_t structure is intact (at any moment, the
2086 * master thread may exit the barrier code and free the team data structure,
2087 * and return the threads to the thread pool).
2088 *
2089 * This does not work with the the tasking code, as the thread is still
2090 * expected to participate in the execution of any tasks that may have been
2091 * spawned my a member of the team, and the thread still needs access to all
2092 * to each thread in the team, so that it can steal work from it.
2093 *
2094 * Enter the existence of the kmp_task_team_t struct. It employs a reference
2095 * counting mechanims, and is allocated by the master thread before calling
2096 * __kmp_<barrier_kind>_release, and then is release by the last thread to
2097 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2098 * of the kmp_task_team_t structs for consecutive barriers can overlap
2099 * (and will, unless the master thread is the last thread to exit the barrier
2100 * release phase, which is not typical).
2101 *
2102 * The existence of such a struct is useful outside the context of tasking,
2103 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2104 * so that any performance differences show up when comparing the 2.5 vs. 3.0
2105 * libraries.
2106 *
2107 * We currently use the existence of the threads array as an indicator that
2108 * tasks were spawned since the last barrier. If the structure is to be
2109 * useful outside the context of tasking, then this will have to change, but
2110 * not settting the field minimizes the performance impact of tasking on
2111 * barriers, when no explicit tasks were spawned (pushed, actually).
2112 */
2113
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002114
Jim Cownie5e8470a2013-09-27 10:38:44 +00002115static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2116// Lock for task team data structures
2117static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2118
2119
2120//------------------------------------------------------------------------------
2121// __kmp_alloc_task_deque:
2122// Allocates a task deque for a particular thread, and initialize the necessary
2123// data structures relating to the deque. This only happens once per thread
2124// per task team since task teams are recycled.
2125// No lock is needed during allocation since each thread allocates its own
2126// deque.
2127
2128static void
2129__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2130{
2131 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2132 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2133
2134 // Initialize last stolen task field to "none"
2135 thread_data -> td.td_deque_last_stolen = -1;
2136
2137 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2138 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2139 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2140
2141 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2142 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2143 // Allocate space for task deque, and zero the deque
2144 // Cannot use __kmp_thread_calloc() because threads not around for
2145 // kmp_reap_task_team( ).
2146 thread_data -> td.td_deque = (kmp_taskdata_t **)
2147 __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2148}
2149
2150
2151//------------------------------------------------------------------------------
2152// __kmp_free_task_deque:
2153// Deallocates a task deque for a particular thread.
2154// Happens at library deallocation so don't need to reset all thread data fields.
2155
2156static void
2157__kmp_free_task_deque( kmp_thread_data_t *thread_data )
2158{
2159 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2160
2161 if ( thread_data -> td.td_deque != NULL ) {
2162 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2163 __kmp_free( thread_data -> td.td_deque );
2164 thread_data -> td.td_deque = NULL;
2165 }
2166 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2167
2168#ifdef BUILD_TIED_TASK_STACK
2169 // GEH: Figure out what to do here for td_susp_tied_tasks
2170 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2171 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2172 }
2173#endif // BUILD_TIED_TASK_STACK
2174}
2175
2176
2177//------------------------------------------------------------------------------
2178// __kmp_realloc_task_threads_data:
2179// Allocates a threads_data array for a task team, either by allocating an initial
2180// array or enlarging an existing array. Only the first thread to get the lock
2181// allocs or enlarges the array and re-initializes the array eleemnts.
2182// That thread returns "TRUE", the rest return "FALSE".
2183// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2184// The current size is given by task_team -> tt.tt_max_threads.
2185
2186static int
2187__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2188{
2189 kmp_thread_data_t ** threads_data_p;
2190 kmp_int32 nthreads, maxthreads;
2191 int is_init_thread = FALSE;
2192
2193 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2194 // Already reallocated and initialized.
2195 return FALSE;
2196 }
2197
2198 threads_data_p = & task_team -> tt.tt_threads_data;
2199 nthreads = task_team -> tt.tt_nproc;
2200 maxthreads = task_team -> tt.tt_max_threads;
2201
2202 // All threads must lock when they encounter the first task of the implicit task
2203 // region to make sure threads_data fields are (re)initialized before used.
2204 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2205
2206 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2207 // first thread to enable tasking
2208 kmp_team_t *team = thread -> th.th_team;
2209 int i;
2210
2211 is_init_thread = TRUE;
2212 if ( maxthreads < nthreads ) {
2213
2214 if ( *threads_data_p != NULL ) {
2215 kmp_thread_data_t *old_data = *threads_data_p;
2216 kmp_thread_data_t *new_data = NULL;
2217
2218 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2219 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2220 __kmp_gtid_from_thread( thread ), task_team,
2221 nthreads, maxthreads ) );
2222 // Reallocate threads_data to have more elements than current array
2223 // Cannot use __kmp_thread_realloc() because threads not around for
2224 // kmp_reap_task_team( ). Note all new array entries are initialized
2225 // to zero by __kmp_allocate().
2226 new_data = (kmp_thread_data_t *)
2227 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2228 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002229 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002230 (void *) old_data,
2231 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002232
2233#ifdef BUILD_TIED_TASK_STACK
2234 // GEH: Figure out if this is the right thing to do
2235 for (i = maxthreads; i < nthreads; i++) {
2236 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2237 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2238 }
2239#endif // BUILD_TIED_TASK_STACK
2240 // Install the new data and free the old data
2241 (*threads_data_p) = new_data;
2242 __kmp_free( old_data );
2243 }
2244 else {
2245 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2246 "threads data for task_team %p, size = %d\n",
2247 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2248 // Make the initial allocate for threads_data array, and zero entries
2249 // Cannot use __kmp_thread_calloc() because threads not around for
2250 // kmp_reap_task_team( ).
2251 *threads_data_p = (kmp_thread_data_t *)
2252 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2253#ifdef BUILD_TIED_TASK_STACK
2254 // GEH: Figure out if this is the right thing to do
2255 for (i = 0; i < nthreads; i++) {
2256 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2257 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2258 }
2259#endif // BUILD_TIED_TASK_STACK
2260 }
2261 task_team -> tt.tt_max_threads = nthreads;
2262 }
2263 else {
2264 // If array has (more than) enough elements, go ahead and use it
2265 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2266 }
2267
2268 // initialize threads_data pointers back to thread_info structures
2269 for (i = 0; i < nthreads; i++) {
2270 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2271 thread_data -> td.td_thr = team -> t.t_threads[i];
2272
2273 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2274 // The last stolen field survives across teams / barrier, and the number
2275 // of threads may have changed. It's possible (likely?) that a new
2276 // parallel region will exhibit the same behavior as the previous region.
2277 thread_data -> td.td_deque_last_stolen = -1;
2278 }
2279 }
2280
2281 KMP_MB();
2282 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2283 }
2284
2285 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2286 return is_init_thread;
2287}
2288
2289
2290//------------------------------------------------------------------------------
2291// __kmp_free_task_threads_data:
2292// Deallocates a threads_data array for a task team, including any attached
2293// tasking deques. Only occurs at library shutdown.
2294
2295static void
2296__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2297{
2298 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2299 if ( task_team -> tt.tt_threads_data != NULL ) {
2300 int i;
2301 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2302 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2303 }
2304 __kmp_free( task_team -> tt.tt_threads_data );
2305 task_team -> tt.tt_threads_data = NULL;
2306 }
2307 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2308}
2309
2310
2311//------------------------------------------------------------------------------
2312// __kmp_allocate_task_team:
2313// Allocates a task team associated with a specific team, taking it from
2314// the global task team free list if possible. Also initializes data structures.
2315
2316static kmp_task_team_t *
2317__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2318{
2319 kmp_task_team_t *task_team = NULL;
2320 int nthreads;
2321
2322 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2323 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2324
2325 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2326 // Take a task team from the task team pool
2327 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2328 if (__kmp_free_task_teams != NULL) {
2329 task_team = __kmp_free_task_teams;
2330 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2331 task_team -> tt.tt_next = NULL;
2332 }
2333 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2334 }
2335
2336 if (task_team == NULL) {
2337 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2338 "task team for team %p\n",
2339 __kmp_gtid_from_thread( thread ), team ) );
2340 // Allocate a new task team if one is not available.
2341 // Cannot use __kmp_thread_malloc() because threads not around for
2342 // kmp_reap_task_team( ).
2343 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2344 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2345 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2346 //task_team -> tt.tt_max_threads = 0;
2347 //task_team -> tt.tt_next = NULL;
2348 }
2349
2350 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002351#if OMP_41_ENABLED
2352 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2353#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2355
Jim Cownie5e8470a2013-09-27 10:38:44 +00002356 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2357 TCW_4( task_team -> tt.tt_active, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002358
Jonathan Peyton54127982015-11-04 21:37:48 +00002359 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
2360 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002361 return task_team;
2362}
2363
2364
2365//------------------------------------------------------------------------------
2366// __kmp_free_task_team:
2367// Frees the task team associated with a specific thread, and adds it
2368// to the global task team free list.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002369
Jonathan Peyton54127982015-11-04 21:37:48 +00002370void
Jim Cownie5e8470a2013-09-27 10:38:44 +00002371__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2372{
2373 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2374 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2375
Jim Cownie5e8470a2013-09-27 10:38:44 +00002376 // Put task team back on free list
2377 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2378
2379 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2380 task_team -> tt.tt_next = __kmp_free_task_teams;
2381 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2382 TCW_PTR(__kmp_free_task_teams, task_team);
2383
2384 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2385}
2386
2387
2388//------------------------------------------------------------------------------
2389// __kmp_reap_task_teams:
2390// Free all the task teams on the task team free list.
2391// Should only be done during library shutdown.
2392// Cannot do anything that needs a thread structure or gtid since they are already gone.
2393
2394void
2395__kmp_reap_task_teams( void )
2396{
2397 kmp_task_team_t *task_team;
2398
2399 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2400 // Free all task_teams on the free list
2401 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2402 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2403 __kmp_free_task_teams = task_team -> tt.tt_next;
2404 task_team -> tt.tt_next = NULL;
2405
2406 // Free threads_data if necessary
2407 if ( task_team -> tt.tt_threads_data != NULL ) {
2408 __kmp_free_task_threads_data( task_team );
2409 }
2410 __kmp_free( task_team );
2411 }
2412 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2413 }
2414}
2415
Jim Cownie5e8470a2013-09-27 10:38:44 +00002416//------------------------------------------------------------------------------
2417// __kmp_wait_to_unref_task_teams:
2418// Some threads could still be in the fork barrier release code, possibly
2419// trying to steal tasks. Wait for each thread to unreference its task team.
2420//
2421void
2422__kmp_wait_to_unref_task_teams(void)
2423{
2424 kmp_info_t *thread;
2425 kmp_uint32 spins;
2426 int done;
2427
2428 KMP_INIT_YIELD( spins );
2429
2430
2431 for (;;) {
2432 done = TRUE;
2433
2434 // TODO: GEH - this may be is wrong because some sync would be necessary
2435 // in case threads are added to the pool during the traversal.
2436 // Need to verify that lock for thread pool is held when calling
2437 // this routine.
2438 for (thread = (kmp_info_t *)__kmp_thread_pool;
2439 thread != NULL;
2440 thread = thread->th.th_next_pool)
2441 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002442#if KMP_OS_WINDOWS
2443 DWORD exit_val;
2444#endif
2445 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2446 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2447 __kmp_gtid_from_thread( thread ) ) );
2448 continue;
2449 }
2450#if KMP_OS_WINDOWS
2451 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2452 if (!__kmp_is_thread_alive(thread, &exit_val)) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002453 thread->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002454 continue;
2455 }
2456#endif
2457
2458 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2459
2460 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2461 __kmp_gtid_from_thread( thread ) ) );
2462
2463 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002464 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002465 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002466 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002467 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2468 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002469 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002470 }
2471 }
2472 }
2473 if (done) {
2474 break;
2475 }
2476
2477 // If we are oversubscribed,
2478 // or have waited a bit (and library mode is throughput), yield.
2479 // Pause is in the following code.
2480 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2481 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2482 }
2483
2484
2485}
2486
2487
2488//------------------------------------------------------------------------------
2489// __kmp_task_team_setup: Create a task_team for the current team, but use
2490// an already created, unused one if it already exists.
2491// This may be called by any thread, but only for teams with # threads >1.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002492void
Jonathan Peyton54127982015-11-04 21:37:48 +00002493__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002494{
2495 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2496
Jonathan Peyton54127982015-11-04 21:37:48 +00002497 // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
2498 // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
2499 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002500 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002501 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002502 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002503 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002504 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002505
2506 // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is
2507 // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
2508 // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
2509 // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for
2510 // serialized teams.
2511 int other_team = 1 - this_thr->th.th_task_state;
2512 if (team->t.t_task_team[other_team] == NULL && team->t.t_nproc > 1) { // setup other team as well
2513 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2514 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2515 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2516 ((team != NULL) ? team->t.t_id : -1), other_team ));
2517 }
2518 else { // Leave the old task team struct in place for the upcoming region; adjust as needed
2519 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2520 if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
2521 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2522 TCW_4(task_team->tt.tt_found_tasks, FALSE);
2523#if OMP_41_ENABLED
2524 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2525#endif
2526 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
2527 TCW_4(task_team->tt.tt_active, TRUE );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002528 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002529 // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
2530 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
2531 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2532 ((team != NULL) ? team->t.t_id : -1), other_team ));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002533 }
2534}
2535
2536
2537//------------------------------------------------------------------------------
2538// __kmp_task_team_sync: Propagation of task team data from team to threads
2539// which happens just after the release phase of a team barrier. This may be
2540// called by any thread, but only for teams with # threads > 1.
2541
2542void
2543__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2544{
2545 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2546
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002547 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002548 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002549 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2550 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jonathan Peyton54127982015-11-04 21:37:48 +00002551 KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002552 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2553 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002554}
2555
2556
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002557//--------------------------------------------------------------------------------------------
2558// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
Jonathan Peyton54127982015-11-04 21:37:48 +00002559// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created.
2560// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
2561// optionally as the last argument. When wait is zero, master thread does not wait for
2562// unfinished_threads to reach 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002563void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002564__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002565 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jonathan Peyton54127982015-11-04 21:37:48 +00002566 , int wait)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002567{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002568 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002569
2570 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2571 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2572
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002573 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002574 if (wait) {
2575 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2576 __kmp_gtid_from_thread(this_thr), task_team));
2577 // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
2578 // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
2579 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2580 flag.wait(this_thr, TRUE
2581 USE_ITT_BUILD_ARG(itt_sync_obj));
2582 }
2583 // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
2584 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
2585 "setting active to false, setting local and team's pointer to NULL\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002586 __kmp_gtid_from_thread(this_thr), task_team));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002587#if OMP_41_ENABLED
2588 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2589 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2590#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002591 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002592#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002593 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2594 KMP_MB();
2595
2596 TCW_PTR(this_thr->th.th_task_team, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002597 }
2598}
2599
2600
2601//------------------------------------------------------------------------------
2602// __kmp_tasking_barrier:
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002603// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002604// Internal function to execute all tasks prior to a regular barrier or a
2605// join barrier. It is a full barrier itself, which unfortunately turns
2606// regular barriers into double barriers and join barriers into 1 1/2
2607// barriers.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002608void
2609__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2610{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002611 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002612 int flag = FALSE;
2613 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2614
2615#if USE_ITT_BUILD
2616 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2617#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002618 kmp_flag_32 spin_flag(spin, 0U);
2619 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2620 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002621#if USE_ITT_BUILD
2622 // TODO: What about itt_sync_obj??
2623 KMP_FSYNC_SPIN_PREPARE( spin );
2624#endif /* USE_ITT_BUILD */
2625
2626 if( TCR_4(__kmp_global.g.g_done) ) {
2627 if( __kmp_global.g.g_abort )
2628 __kmp_abort_thread( );
2629 break;
2630 }
2631 KMP_YIELD( TRUE ); // GH: We always yield here
2632 }
2633#if USE_ITT_BUILD
2634 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2635#endif /* USE_ITT_BUILD */
2636}
2637
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002638
2639#if OMP_41_ENABLED
2640
2641/* __kmp_give_task puts a task into a given thread queue if:
2642 - the queue for that thread it was created
2643 - there's space in that queue
2644
2645 Because of this, __kmp_push_task needs to check if there's space after getting the lock
2646 */
2647static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2648{
2649 kmp_task_team_t * task_team = thread->th.th_task_team;
2650 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2651 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2652 bool result = false;
2653
2654 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2655
2656 // assert tasking is enabled? what if not?
2657 KMP_DEBUG_ASSERT( task_team != NULL );
2658
2659 if (thread_data -> td.td_deque == NULL ) {
2660 // There's no queue in this thread, go find another one
2661 // We're guaranteed that at least one thread has a queue
2662 KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2663 return result;
2664 }
2665
2666 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2667 {
2668 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2669 return result;
2670 }
2671
2672 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2673
2674 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2675 {
2676 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2677 goto release_and_exit;
2678 }
2679
2680 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2681 // Wrap index.
2682 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2683 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2684
2685 result = true;
Jonathan Peyton1406f012015-05-22 22:35:51 +00002686 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002687
2688release_and_exit:
2689 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2690
2691 return result;
2692}
2693
2694
2695/* The finish of the a proxy tasks is divided in two pieces:
2696 - the top half is the one that can be done from a thread outside the team
2697 - the bottom half must be run from a them within the team
2698
2699 In order to run the bottom half the task gets queued back into one of the threads of the team.
2700 Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2701 So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2702 - things that can be run before queuing the bottom half
2703 - things that must be run after queuing the bottom half
2704
2705 This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2706 we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2707*/
2708
2709static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2710{
2711 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2712 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2713 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2714 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2715
2716 taskdata -> td_flags.complete = 1; // mark the task as completed
2717
2718 if ( taskdata->td_taskgroup )
2719 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2720
2721 // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
2722 TCR_4(taskdata->td_incomplete_child_tasks++);
2723}
2724
2725static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2726{
2727 kmp_int32 children = 0;
2728
2729 // Predecrement simulated by "- 1" calculation
2730 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2731 KMP_DEBUG_ASSERT( children >= 0 );
2732
2733 // Remove the imaginary children
2734 TCR_4(taskdata->td_incomplete_child_tasks--);
2735}
2736
2737static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2738{
2739 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2740 kmp_info_t * thread = __kmp_threads[ gtid ];
2741
2742 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2743 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2744
2745 // We need to wait to make sure the top half is finished
2746 // Spinning here should be ok as this should happen quickly
2747 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2748
2749 __kmp_release_deps(gtid,taskdata);
2750 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2751}
2752
2753/*!
2754@ingroup TASKING
2755@param gtid Global Thread ID of encountering thread
2756@param ptask Task which execution is completed
2757
2758Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
2759*/
2760void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2761{
2762 KMP_DEBUG_ASSERT( ptask != NULL );
2763 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2764 KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2765
2766 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2767
2768 __kmp_first_top_half_finish_proxy(taskdata);
2769 __kmp_second_top_half_finish_proxy(taskdata);
2770 __kmp_bottom_half_finish_proxy(gtid,ptask);
2771
2772 KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2773}
2774
2775/*!
2776@ingroup TASKING
2777@param ptask Task which execution is completed
2778
2779Execute the completation of a proxy task from a thread that could not belong to the team.
2780*/
2781void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2782{
2783 KMP_DEBUG_ASSERT( ptask != NULL );
2784 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2785
2786 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2787
2788 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2789
2790 __kmp_first_top_half_finish_proxy(taskdata);
2791
2792 // Enqueue task to complete bottom half completation from a thread within the corresponding team
2793 kmp_team_t * team = taskdata->td_team;
2794 kmp_int32 nthreads = team->t.t_nproc;
2795 kmp_info_t *thread;
2796 kmp_int32 k = 0;
2797
2798 do {
Jonathan Peyton1406f012015-05-22 22:35:51 +00002799 //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002800 //For now we're just linearly trying to find a thread
2801 k = (k+1) % nthreads;
2802 thread = team->t.t_threads[k];
2803 } while ( !__kmp_give_task( thread, k, ptask ) );
2804
2805 __kmp_second_top_half_finish_proxy(taskdata);
2806
2807 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2808}
2809
2810#endif