blob: 4bb44349d857b4355ee56a8b81c3767c0de897ad [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jonathan Peyton45be4502015-08-11 21:36:41 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanove5f44922015-04-29 16:22:07 +000022#if OMPT_SUPPORT
23#include "ompt-specific.h"
24#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
Jim Cownie5e8470a2013-09-27 10:38:44 +000026
27/* ------------------------------------------------------------------------ */
28/* ------------------------------------------------------------------------ */
29
30
31/* forward declaration */
32static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
33static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
34static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
35
Andrey Churbanov535b6fa2015-05-07 17:41:51 +000036#ifdef OMP_41_ENABLED
37static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
38#endif
39
Jim Cownie4cc4bb42014-10-07 16:25:50 +000040static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
41 switch (((kmp_flag_64 *)flag)->get_type()) {
42 case flag32: __kmp_resume_32(gtid, NULL); break;
43 case flag64: __kmp_resume_64(gtid, NULL); break;
44 case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
45 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000046}
47
48#ifdef BUILD_TIED_TASK_STACK
49
50//---------------------------------------------------------------------------
51// __kmp_trace_task_stack: print the tied tasks from the task stack in order
52// from top do bottom
53//
54// gtid: global thread identifier for thread containing stack
55// thread_data: thread data for task team thread containing stack
56// threshold: value above which the trace statement triggers
57// location: string identifying call site of this function (for trace)
58
59static void
60__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
61{
62 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
63 kmp_taskdata_t **stack_top = task_stack -> ts_top;
64 kmp_int32 entries = task_stack -> ts_entries;
65 kmp_taskdata_t *tied_task;
66
67 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
68 "first_block = %p, stack_top = %p \n",
69 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
70
71 KMP_DEBUG_ASSERT( stack_top != NULL );
72 KMP_DEBUG_ASSERT( entries > 0 );
73
74 while ( entries != 0 )
75 {
76 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
77 // fix up ts_top if we need to pop from previous block
78 if ( entries & TASK_STACK_INDEX_MASK == 0 )
79 {
80 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
81
82 stack_block = stack_block -> sb_prev;
83 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
84 }
85
86 // finish bookkeeping
87 stack_top--;
88 entries--;
89
90 tied_task = * stack_top;
91
92 KMP_DEBUG_ASSERT( tied_task != NULL );
93 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
94
95 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
96 "stack_top=%p, tied_task=%p\n",
97 location, gtid, entries, stack_top, tied_task ) );
98 }
99 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
100
101 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
102 location, gtid ) );
103}
104
105//---------------------------------------------------------------------------
106// __kmp_init_task_stack: initialize the task stack for the first time
107// after a thread_data structure is created.
108// It should not be necessary to do this again (assuming the stack works).
109//
110// gtid: global thread identifier of calling thread
111// thread_data: thread data for task team thread containing stack
112
113static void
114__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
115{
116 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
117 kmp_stack_block_t *first_block;
118
119 // set up the first block of the stack
120 first_block = & task_stack -> ts_first_block;
121 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
122 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
123
124 // initialize the stack to be empty
125 task_stack -> ts_entries = TASK_STACK_EMPTY;
126 first_block -> sb_next = NULL;
127 first_block -> sb_prev = NULL;
128}
129
130
131//---------------------------------------------------------------------------
132// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
133//
134// gtid: global thread identifier for calling thread
135// thread_data: thread info for thread containing stack
136
137static void
138__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
139{
140 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
141 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
142
143 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
144 // free from the second block of the stack
145 while ( stack_block != NULL ) {
146 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
147
148 stack_block -> sb_next = NULL;
149 stack_block -> sb_prev = NULL;
150 if (stack_block != & task_stack -> ts_first_block) {
151 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
152 }
153 stack_block = next_block;
154 }
155 // initialize the stack to be empty
156 task_stack -> ts_entries = 0;
157 task_stack -> ts_top = NULL;
158}
159
160
161//---------------------------------------------------------------------------
162// __kmp_push_task_stack: Push the tied task onto the task stack.
163// Grow the stack if necessary by allocating another block.
164//
165// gtid: global thread identifier for calling thread
166// thread: thread info for thread containing stack
167// tied_task: the task to push on the stack
168
169static void
170__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
171{
172 // GEH - need to consider what to do if tt_threads_data not allocated yet
173 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
174 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
175 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
176
177 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
178 return; // Don't push anything on stack if team or team tasks are serialized
179 }
180
181 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
182 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
183
184 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
185 gtid, thread, tied_task ) );
186 // Store entry
187 * (task_stack -> ts_top) = tied_task;
188
189 // Do bookkeeping for next push
190 task_stack -> ts_top++;
191 task_stack -> ts_entries++;
192
193 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
194 {
195 // Find beginning of this task block
196 kmp_stack_block_t *stack_block =
197 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
198
199 // Check if we already have a block
200 if ( stack_block -> sb_next != NULL )
201 { // reset ts_top to beginning of next block
202 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
203 }
204 else
205 { // Alloc new block and link it up
206 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
207 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
208
209 task_stack -> ts_top = & new_block -> sb_block[0];
210 stack_block -> sb_next = new_block;
211 new_block -> sb_prev = stack_block;
212 new_block -> sb_next = NULL;
213
214 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
215 gtid, tied_task, new_block ) );
216 }
217 }
218 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
219}
220
221//---------------------------------------------------------------------------
222// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
223// the task, just check to make sure it matches the ending task passed in.
224//
225// gtid: global thread identifier for the calling thread
226// thread: thread info structure containing stack
227// tied_task: the task popped off the stack
228// ending_task: the task that is ending (should match popped task)
229
230static void
231__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
232{
233 // GEH - need to consider what to do if tt_threads_data not allocated yet
234 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
235 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
236 kmp_taskdata_t *tied_task;
237
238 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
239 return; // Don't pop anything from stack if team or team tasks are serialized
240 }
241
242 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
243 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
244
245 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
246
247 // fix up ts_top if we need to pop from previous block
248 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
249 {
250 kmp_stack_block_t *stack_block =
251 (kmp_stack_block_t *) (task_stack -> ts_top) ;
252
253 stack_block = stack_block -> sb_prev;
254 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
255 }
256
257 // finish bookkeeping
258 task_stack -> ts_top--;
259 task_stack -> ts_entries--;
260
261 tied_task = * (task_stack -> ts_top );
262
263 KMP_DEBUG_ASSERT( tied_task != NULL );
264 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
265 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
266
267 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
268 return;
269}
270#endif /* BUILD_TIED_TASK_STACK */
271
272//---------------------------------------------------
273// __kmp_push_task: Add a task to the thread's deque
274
275static kmp_int32
276__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
277{
278 kmp_info_t * thread = __kmp_threads[ gtid ];
279 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
280 kmp_task_team_t * task_team = thread->th.th_task_team;
281 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
282 kmp_thread_data_t * thread_data;
283
284 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
285
286 // The first check avoids building task_team thread data if serialized
287 if ( taskdata->td_flags.task_serial ) {
288 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
289 gtid, taskdata ) );
290 return TASK_NOT_PUSHED;
291 }
292
293 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
294 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000295 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000296 __kmp_enable_tasking( task_team, thread );
297 }
298 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
299 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
300
301 // Find tasking deque specific to encountering thread
302 thread_data = & task_team -> tt.tt_threads_data[ tid ];
303
304 // No lock needed since only owner can allocate
305 if (thread_data -> td.td_deque == NULL ) {
306 __kmp_alloc_task_deque( thread, thread_data );
307 }
308
309 // Check if deque is full
310 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
311 {
312 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
313 gtid, taskdata ) );
314 return TASK_NOT_PUSHED;
315 }
316
317 // Lock the deque for the task push operation
318 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
319
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000320#if OMP_41_ENABLED
321 // Need to recheck as we can get a proxy task from a thread outside of OpenMP
322 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
323 {
324 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
325 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
326 gtid, taskdata ) );
327 return TASK_NOT_PUSHED;
328 }
329#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000330 // Must have room since no thread can add tasks but calling thread
331 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000332#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000333
334 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
335 // Wrap index.
336 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
337 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
338
339 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
340
341 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
342 "task=%p ntasks=%d head=%u tail=%u\n",
343 gtid, taskdata, thread_data->td.td_deque_ntasks,
344 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
345
346 return TASK_SUCCESSFULLY_PUSHED;
347}
348
349
350//-----------------------------------------------------------------------------------------
351// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
352// this_thr: thread structure to set current_task in.
353
354void
355__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
356{
357 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
358 "curtask_parent=%p\n",
359 0, this_thr, this_thr -> th.th_current_task,
360 this_thr -> th.th_current_task -> td_parent ) );
361
362 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
363
364 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
365 "curtask_parent=%p\n",
366 0, this_thr, this_thr -> th.th_current_task,
367 this_thr -> th.th_current_task -> td_parent ) );
368}
369
370
371//---------------------------------------------------------------------------------------
372// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
373// this_thr: thread structure to set up
374// team: team for implicit task data
375// tid: thread within team to set up
376
377void
378__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
379{
380 // current task of the thread is a parent of the new just created implicit tasks of new team
381 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
382 "parent_task=%p\n",
383 tid, this_thr, this_thr->th.th_current_task,
384 team->t.t_implicit_task_taskdata[tid].td_parent ) );
385
386 KMP_DEBUG_ASSERT (this_thr != NULL);
387
388 if( tid == 0 ) {
389 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
390 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
391 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
392 }
393 } else {
394 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
395 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
396 }
397
398 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
399 "parent_task=%p\n",
400 tid, this_thr, this_thr->th.th_current_task,
401 team->t.t_implicit_task_taskdata[tid].td_parent ) );
402}
403
404
405//----------------------------------------------------------------------
406// __kmp_task_start: bookkeeping for a task starting execution
407// GTID: global thread id of calling thread
408// task: task starting execution
409// current_task: task suspending
410
411static void
412__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
413{
414 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
415 kmp_info_t * thread = __kmp_threads[ gtid ];
416
417 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
418 gtid, taskdata, current_task) );
419
420 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
421
422 // mark currently executing task as suspended
423 // TODO: GEH - make sure root team implicit task is initialized properly.
424 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
425 current_task -> td_flags.executing = 0;
426
427 // Add task to stack if tied
428#ifdef BUILD_TIED_TASK_STACK
429 if ( taskdata -> td_flags.tiedness == TASK_TIED )
430 {
431 __kmp_push_task_stack( gtid, thread, taskdata );
432 }
433#endif /* BUILD_TIED_TASK_STACK */
434
435 // mark starting task as executing and as current task
436 thread -> th.th_current_task = taskdata;
437
438 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
439 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
440 taskdata -> td_flags.started = 1;
441 taskdata -> td_flags.executing = 1;
442 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
443 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
444
445 // GEH TODO: shouldn't we pass some sort of location identifier here?
446 // APT: yes, we will pass location here.
447 // need to store current thread state (in a thread or taskdata structure)
448 // before setting work_state, otherwise wrong state is set after end of task
449
450 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
451 gtid, taskdata ) );
452
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000453#if OMPT_SUPPORT
454 if ((ompt_status == ompt_status_track_callback) &&
455 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
456 kmp_taskdata_t *parent = taskdata->td_parent;
457 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
458 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
459 parent ? &(parent->ompt_task_info.frame) : NULL,
460 taskdata->ompt_task_info.task_id,
461 taskdata->ompt_task_info.function);
462 }
463#endif
464
Jim Cownie5e8470a2013-09-27 10:38:44 +0000465 return;
466}
467
468
469//----------------------------------------------------------------------
470// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
471// loc_ref: source location information; points to beginning of task block.
472// gtid: global thread number.
473// task: task thunk for the started task.
474
475void
476__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
477{
478 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
479 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
480
481 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
482 gtid, loc_ref, taskdata, current_task ) );
483
484 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
485 __kmp_task_start( gtid, task, current_task );
486
487 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
488 gtid, loc_ref, taskdata ) );
489
490 return;
491}
492
493#ifdef TASK_UNUSED
494//----------------------------------------------------------------------
495// __kmpc_omp_task_begin: report that a given task has started execution
496// NEVER GENERATED BY COMPILER, DEPRECATED!!!
497
498void
499__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
500{
501 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
502
503 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
504 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
505
506 __kmp_task_start( gtid, task, current_task );
507
508 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
509 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
510
511 return;
512}
513#endif // TASK_UNUSED
514
515
516//-------------------------------------------------------------------------------------
517// __kmp_free_task: free the current task space and the space for shareds
518// gtid: Global thread ID of calling thread
519// taskdata: task to free
520// thread: thread data structure of caller
521
522static void
523__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
524{
525 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
526 gtid, taskdata) );
527
528 // Check to make sure all flags and counters have the correct values
529 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
530 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
531 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
532 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
533 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
534 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
535
536 taskdata->td_flags.freed = 1;
537 // deallocate the taskdata and shared variable blocks associated with this task
538 #if USE_FAST_MEMORY
539 __kmp_fast_free( thread, taskdata );
540 #else /* ! USE_FAST_MEMORY */
541 __kmp_thread_free( thread, taskdata );
542 #endif
543
544 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
545 gtid, taskdata) );
546}
547
548//-------------------------------------------------------------------------------------
549// __kmp_free_task_and_ancestors: free the current task and ancestors without children
550//
551// gtid: Global thread ID of calling thread
552// taskdata: task to free
553// thread: thread data structure of caller
554
555static void
556__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
557{
558 kmp_int32 children = 0;
559 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
560
561 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
562
563 if ( !team_or_tasking_serialized ) {
564 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
565 KMP_DEBUG_ASSERT( children >= 0 );
566 }
567
568 // Now, go up the ancestor tree to see if any ancestors can now be freed.
569 while ( children == 0 )
570 {
571 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
572
573 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
574 "and freeing itself\n", gtid, taskdata) );
575
576 // --- Deallocate my ancestor task ---
577 __kmp_free_task( gtid, taskdata, thread );
578
579 taskdata = parent_taskdata;
580
581 // Stop checking ancestors at implicit task or if tasking serialized
582 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
583 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
584 return;
585
586 if ( !team_or_tasking_serialized ) {
587 // Predecrement simulated by "- 1" calculation
588 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
589 KMP_DEBUG_ASSERT( children >= 0 );
590 }
591 }
592
593 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
594 "not freeing it yet\n", gtid, taskdata, children) );
595}
596
597//---------------------------------------------------------------------
598// __kmp_task_finish: bookkeeping to do when a task finishes execution
599// gtid: global thread ID for calling thread
600// task: task to be finished
601// resumed_task: task to be resumed. (may be NULL if task is serialized)
602
603static void
604__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
605{
606 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
607 kmp_info_t * thread = __kmp_threads[ gtid ];
608 kmp_int32 children = 0;
609
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000610#if OMPT_SUPPORT
611 if ((ompt_status == ompt_status_track_callback) &&
612 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
613 kmp_taskdata_t *parent = taskdata->td_parent;
614 ompt_callbacks.ompt_callback(ompt_event_task_end)(
615 taskdata->ompt_task_info.task_id);
616 }
617#endif
618
Jim Cownie5e8470a2013-09-27 10:38:44 +0000619 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
620 gtid, taskdata, resumed_task) );
621
622 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
623
624 // Pop task from stack if tied
625#ifdef BUILD_TIED_TASK_STACK
626 if ( taskdata -> td_flags.tiedness == TASK_TIED )
627 {
628 __kmp_pop_task_stack( gtid, thread, taskdata );
629 }
630#endif /* BUILD_TIED_TASK_STACK */
631
Jim Cownie5e8470a2013-09-27 10:38:44 +0000632 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000633 taskdata -> td_flags.complete = 1; // mark the task as completed
634 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
635 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
636
637 // Only need to keep track of count if team parallel and tasking not serialized
638 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
639 // Predecrement simulated by "- 1" calculation
640 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
641 KMP_DEBUG_ASSERT( children >= 0 );
642#if OMP_40_ENABLED
643 if ( taskdata->td_taskgroup )
644 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000645 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000646#endif
647 }
648
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000649 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
650 // Othertwise, if a task is executed immediately from the release_deps code
651 // the flag will be reset to 1 again by this same function
652 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
653 taskdata -> td_flags.executing = 0; // suspend the finishing task
654
Jim Cownie5e8470a2013-09-27 10:38:44 +0000655 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
656 gtid, taskdata, children) );
657
Jim Cownie181b4bb2013-12-23 17:28:57 +0000658#if OMP_40_ENABLED
659 /* If the tasks' destructor thunk flag has been set, we need to invoke the
660 destructor thunk that has been generated by the compiler.
661 The code is placed here, since at this point other tasks might have been released
662 hence overlapping the destructor invokations with some other work in the
663 released tasks. The OpenMP spec is not specific on when the destructors are
664 invoked, so we should be free to choose.
665 */
666 if (taskdata->td_flags.destructors_thunk) {
667 kmp_routine_entry_t destr_thunk = task->destructors;
668 KMP_ASSERT(destr_thunk);
669 destr_thunk(gtid, task);
670 }
671#endif // OMP_40_ENABLED
672
Jim Cownie5e8470a2013-09-27 10:38:44 +0000673 // bookkeeping for resuming task:
674 // GEH - note tasking_ser => task_serial
675 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
676 taskdata->td_flags.task_serial);
677 if ( taskdata->td_flags.task_serial )
678 {
679 if (resumed_task == NULL) {
680 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
681 }
682 else {
683 // verify resumed task passed in points to parent
684 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
685 }
686 }
687 else {
688 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
689 }
690
691 // Free this task and then ancestor tasks if they have no children.
692 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
693
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000694 // FIXME johnmc: I this statement should be before the last one so if an
695 // asynchronous inquiry peers into the runtime system it doesn't see the freed
696 // task as the current task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000697 __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
698
699 // TODO: GEH - make sure root team implicit task is initialized properly.
700 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
701 resumed_task->td_flags.executing = 1; // resume previous task
702
703 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
704 gtid, taskdata, resumed_task) );
705
706 return;
707}
708
709//---------------------------------------------------------------------
710// __kmpc_omp_task_complete_if0: report that a task has completed execution
711// loc_ref: source location information; points to end of task block.
712// gtid: global thread number.
713// task: task thunk for the completed task.
714
715void
716__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
717{
718 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
719 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
720
721 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
722
723 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
724 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
725
726 return;
727}
728
729#ifdef TASK_UNUSED
730//---------------------------------------------------------------------
731// __kmpc_omp_task_complete: report that a task has completed execution
732// NEVER GENERATED BY COMPILER, DEPRECATED!!!
733
734void
735__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
736{
737 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
738 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
739
740 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
741
742 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
743 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
744 return;
745}
746#endif // TASK_UNUSED
747
748
Andrey Churbanove5f44922015-04-29 16:22:07 +0000749#if OMPT_SUPPORT
750//----------------------------------------------------------------------------------------------------
751// __kmp_task_init_ompt:
752// Initialize OMPT fields maintained by a task. Since the serial task is initialized before
753// ompt_initialize is called, at the point the serial task is initialized we don't know whether
754// OMPT will be used or not when the serial task is initialized. This function provides the support
755// needed to initialize OMPT for the serial task after the fact.
756
757void
758__kmp_task_init_ompt( kmp_taskdata_t * task, int tid )
759{
760 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
761 task->ompt_task_info.function = NULL;
Jonathan Peytonda7c8ab2015-06-29 17:33:03 +0000762 task->ompt_task_info.frame.exit_runtime_frame = NULL;
763 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
Andrey Churbanove5f44922015-04-29 16:22:07 +0000764}
765#endif
766
767
Jim Cownie5e8470a2013-09-27 10:38:44 +0000768//----------------------------------------------------------------------------------------------------
769// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
770//
771// loc_ref: reference to source location of parallel region
772// this_thr: thread data structure corresponding to implicit task
773// team: team for this_thr
774// tid: thread id of given thread within team
775// set_curr_task: TRUE if need to push current task to thread
776// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
777// TODO: Get better loc_ref. Value passed in may be NULL
778
779void
780__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
781{
782 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
783
784 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
785 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
786
787 task->td_task_id = KMP_GEN_TASK_ID();
788 task->td_team = team;
789// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
790 task->td_ident = loc_ref;
791 task->td_taskwait_ident = NULL;
792 task->td_taskwait_counter = 0;
793 task->td_taskwait_thread = 0;
794
795 task->td_flags.tiedness = TASK_TIED;
796 task->td_flags.tasktype = TASK_IMPLICIT;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000797#if OMP_41_ENABLED
798 task->td_flags.proxy = TASK_FULL;
799#endif
800
Jim Cownie5e8470a2013-09-27 10:38:44 +0000801 // All implicit tasks are executed immediately, not deferred
802 task->td_flags.task_serial = 1;
803 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
804 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
805
806 task->td_flags.started = 1;
807 task->td_flags.executing = 1;
808 task->td_flags.complete = 0;
809 task->td_flags.freed = 0;
810
Jim Cownie181b4bb2013-12-23 17:28:57 +0000811#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000812 task->td_dephash = NULL;
813 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000814#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000815
816 if (set_curr_task) { // only do this initialization the first time a thread is created
817 task->td_incomplete_child_tasks = 0;
818 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
819#if OMP_40_ENABLED
820 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
821#endif
822 __kmp_push_current_task_to_thread( this_thr, team, tid );
823 } else {
824 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
825 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
826 }
827
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000828#if OMPT_SUPPORT
829 __kmp_task_init_ompt(task, tid);
830#endif
831
Jim Cownie5e8470a2013-09-27 10:38:44 +0000832 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
833 tid, team, task ) );
834}
835
836// Round up a size to a power of two specified by val
837// Used to insert padding between structures co-allocated using a single malloc() call
838static size_t
839__kmp_round_up_to_val( size_t size, size_t val ) {
840 if ( size & ( val - 1 ) ) {
841 size &= ~ ( val - 1 );
842 if ( size <= KMP_SIZE_T_MAX - val ) {
843 size += val; // Round up if there is no overflow.
844 }; // if
845 }; // if
846 return size;
847} // __kmp_round_up_to_va
848
849
850//---------------------------------------------------------------------------------
851// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
852//
853// loc_ref: source location information
854// gtid: global thread number.
855// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
856// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
857// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
858// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
859// task_entry: Pointer to task code entry point generated by compiler.
860// returns: a pointer to the allocated kmp_task_t structure (task).
861
862kmp_task_t *
863__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
864 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
865 kmp_routine_entry_t task_entry )
866{
867 kmp_task_t *task;
868 kmp_taskdata_t *taskdata;
869 kmp_info_t *thread = __kmp_threads[ gtid ];
870 kmp_team_t *team = thread->th.th_team;
871 kmp_taskdata_t *parent_task = thread->th.th_current_task;
872 size_t shareds_offset;
873
874 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
875 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
876 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
877 sizeof_shareds, task_entry) );
878
879 if ( parent_task->td_flags.final ) {
880 if (flags->merged_if0) {
881 }
882 flags->final = 1;
883 }
884
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000885#if OMP_41_ENABLED
886 if ( flags->proxy == TASK_PROXY ) {
887 flags->tiedness = TASK_UNTIED;
888 flags->merged_if0 = 1;
889
890 /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
891 if ( (thread->th.th_task_team) == NULL ) {
892 /* This should only happen if the team is serialized
893 setup a task team and propagate it to the thread
894 */
895 KMP_DEBUG_ASSERT(team->t.t_serialized);
896 KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
897 __kmp_task_team_setup(thread,team,0,1); // 0,1 indicates only setup the current team regardless of nthreads
898 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
899 }
900 kmp_task_team_t * task_team = thread->th.th_task_team;
901
902 /* tasking must be enabled now as the task might not be pushed */
903 if ( !KMP_TASKING_ENABLED( task_team ) ) {
904 KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
905 __kmp_enable_tasking( task_team, thread );
906 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
907 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
908 // No lock needed since only owner can allocate
909 if (thread_data -> td.td_deque == NULL ) {
910 __kmp_alloc_task_deque( thread, thread_data );
911 }
912 }
913
914 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
915 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
916 }
917#endif
918
Jim Cownie5e8470a2013-09-27 10:38:44 +0000919 // Calculate shared structure offset including padding after kmp_task_t struct
920 // to align pointers in shared struct
921 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
922 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
923
924 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
925 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
926 gtid, shareds_offset) );
927 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
928 gtid, sizeof_shareds) );
929
930 // Avoid double allocation here by combining shareds with taskdata
931 #if USE_FAST_MEMORY
932 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
933 #else /* ! USE_FAST_MEMORY */
934 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
935 #endif /* USE_FAST_MEMORY */
936
937 task = KMP_TASKDATA_TO_TASK(taskdata);
938
939 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000940#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000941 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
942 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
943#else
944 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
945 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
946#endif
947 if (sizeof_shareds > 0) {
948 // Avoid double allocation here by combining shareds with taskdata
949 task->shareds = & ((char *) taskdata)[ shareds_offset ];
950 // Make sure shareds struct is aligned to pointer size
951 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
952 } else {
953 task->shareds = NULL;
954 }
955 task->routine = task_entry;
956 task->part_id = 0; // AC: Always start with 0 part id
957
958 taskdata->td_task_id = KMP_GEN_TASK_ID();
959 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000960 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000961 taskdata->td_parent = parent_task;
962 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
963 taskdata->td_ident = loc_ref;
964 taskdata->td_taskwait_ident = NULL;
965 taskdata->td_taskwait_counter = 0;
966 taskdata->td_taskwait_thread = 0;
967 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000968#if OMP_41_ENABLED
969 // avoid copying icvs for proxy tasks
970 if ( flags->proxy == TASK_FULL )
971#endif
972 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000973
974 taskdata->td_flags.tiedness = flags->tiedness;
975 taskdata->td_flags.final = flags->final;
976 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000977#if OMP_40_ENABLED
978 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
979#endif // OMP_40_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000980#if OMP_41_ENABLED
981 taskdata->td_flags.proxy = flags->proxy;
982#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000983 taskdata->td_flags.tasktype = TASK_EXPLICIT;
984
985 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
986 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
987
988 // GEH - TODO: fix this to copy parent task's value of team_serial flag
989 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
990
991 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
992 // tasks are not left until program termination to execute. Also, it helps locality to execute
993 // immediately.
Jonathan Peyton7881aa12015-05-21 21:16:38 +0000994 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
Jim Cownie5e8470a2013-09-27 10:38:44 +0000995 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
996
997 taskdata->td_flags.started = 0;
998 taskdata->td_flags.executing = 0;
999 taskdata->td_flags.complete = 0;
1000 taskdata->td_flags.freed = 0;
1001
1002 taskdata->td_flags.native = flags->native;
1003
1004 taskdata->td_incomplete_child_tasks = 0;
1005 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1006#if OMP_40_ENABLED
1007 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1008 taskdata->td_dephash = NULL;
1009 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001010#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001011
1012 // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
1013#if OMP_41_ENABLED
1014 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1015#else
1016 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1017#endif
1018 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001019 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1020#if OMP_40_ENABLED
1021 if ( parent_task->td_taskgroup )
1022 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1023#endif
1024 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1025 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1026 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1027 }
1028 }
1029
1030 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1031 gtid, taskdata, taskdata->td_parent) );
1032
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001033#if OMPT_SUPPORT
1034 if (ompt_status & ompt_status_track) {
1035 taskdata->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1036 taskdata->ompt_task_info.function = (void*) task_entry;
Jonathan Peytonda7c8ab2015-06-29 17:33:03 +00001037 taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
1038 taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001039 }
1040#endif
1041
Jim Cownie5e8470a2013-09-27 10:38:44 +00001042 return task;
1043}
1044
1045
1046kmp_task_t *
1047__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1048 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1049 kmp_routine_entry_t task_entry )
1050{
1051 kmp_task_t *retval;
1052 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1053
1054 input_flags->native = FALSE;
1055 // __kmp_task_alloc() sets up all other runtime flags
1056
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001057#if OMP_41_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001058 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001059 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1060 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001061 input_flags->proxy ? "proxy" : "",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001062 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001063#else
1064 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1065 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1066 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1067 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1068#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001069
1070 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1071 sizeof_shareds, task_entry );
1072
1073 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1074
1075 return retval;
1076}
1077
1078//-----------------------------------------------------------
1079// __kmp_invoke_task: invoke the specified task
1080//
1081// gtid: global thread ID of caller
1082// task: the task to invoke
1083// current_task: the task to resume after task invokation
1084
1085static void
1086__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1087{
1088 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001089#if OMP_40_ENABLED
1090 int discard = 0 /* false */;
1091#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001092 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1093 gtid, taskdata, current_task) );
1094
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001095#if OMP_41_ENABLED
1096 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1097 taskdata->td_flags.complete == 1)
1098 {
1099 // This is a proxy task that was already completed but it needs to run
1100 // its bottom-half finish
1101 KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1102 gtid, taskdata) );
1103
1104 __kmp_bottom_half_finish_proxy(gtid,task);
1105
1106 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1107
1108 return;
1109 }
1110#endif
1111
1112#if OMP_41_ENABLED
1113 // Proxy tasks are not handled by the runtime
1114 if ( taskdata->td_flags.proxy != TASK_PROXY )
1115#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001116 __kmp_task_start( gtid, task, current_task );
1117
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001118#if OMPT_SUPPORT
1119 ompt_thread_info_t oldInfo;
1120 kmp_info_t * thread;
1121 if (ompt_status & ompt_status_track) {
1122 // Store the threads states and restore them after the task
1123 thread = __kmp_threads[ gtid ];
1124 oldInfo = thread->th.ompt_thread_info;
1125 thread->th.ompt_thread_info.wait_id = 0;
1126 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1127 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1128 }
1129#endif
1130
Jim Cownie181b4bb2013-12-23 17:28:57 +00001131#if OMP_40_ENABLED
1132 // TODO: cancel tasks if the parallel region has also been cancelled
1133 // TODO: check if this sequence can be hoisted above __kmp_task_start
1134 // if cancellation has been enabled for this run ...
1135 if (__kmp_omp_cancellation) {
1136 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1137 kmp_team_t * this_team = this_thr->th.th_team;
1138 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1139 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001140 KMP_COUNT_BLOCK(TASK_cancelled);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001141 // this task belongs to a task group and we need to cancel it
1142 discard = 1 /* true */;
1143 }
1144 }
1145
Jim Cownie5e8470a2013-09-27 10:38:44 +00001146 //
1147 // Invoke the task routine and pass in relevant data.
1148 // Thunks generated by gcc take a different argument list.
1149 //
Jim Cownie181b4bb2013-12-23 17:28:57 +00001150 if (!discard) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001151 KMP_COUNT_BLOCK(TASK_executed);
1152 KMP_TIME_BLOCK (TASK_execution);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001153#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001154#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +00001155 if (taskdata->td_flags.native) {
1156 ((void (*)(void *))(*(task->routine)))(task->shareds);
1157 }
1158 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001159#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001160 {
1161 (*(task->routine))(gtid, task);
1162 }
1163#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001164 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001165#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001166
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001167
1168#if OMPT_SUPPORT
1169 if (ompt_status & ompt_status_track) {
1170 thread->th.ompt_thread_info = oldInfo;
1171 taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1172 }
1173#endif
1174
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001175#if OMP_41_ENABLED
1176 // Proxy tasks are not handled by the runtime
1177 if ( taskdata->td_flags.proxy != TASK_PROXY )
1178#endif
1179 __kmp_task_finish( gtid, task, current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001180
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001181 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001182 gtid, taskdata, current_task) );
1183 return;
1184}
1185
1186//-----------------------------------------------------------------------
1187// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1188//
1189// loc_ref: location of original task pragma (ignored)
1190// gtid: Global Thread ID of encountering thread
1191// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1192// Returns:
1193// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1194// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1195
1196kmp_int32
1197__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1198{
1199 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1200
1201 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1202 gtid, loc_ref, new_taskdata ) );
1203
1204 /* Should we execute the new task or queue it? For now, let's just always try to
1205 queue it. If the queue fills up, then we'll execute it. */
1206
1207 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1208 { // Execute this task immediately
1209 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1210 new_taskdata->td_flags.task_serial = 1;
1211 __kmp_invoke_task( gtid, new_task, current_task );
1212 }
1213
1214 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1215 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1216 new_taskdata ) );
1217
1218 return TASK_CURRENT_NOT_QUEUED;
1219}
1220
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001221//---------------------------------------------------------------------
1222// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1223// gtid: Global Thread ID of encountering thread
1224// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1225// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1226// returns:
1227//
1228// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1229// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1230kmp_int32
1231__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1232{
1233 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1234
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001235#if OMPT_SUPPORT
1236 if (ompt_status & ompt_status_track) {
1237 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1238 __builtin_frame_address(0);
1239 }
1240#endif
1241
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001242 /* Should we execute the new task or queue it? For now, let's just always try to
1243 queue it. If the queue fills up, then we'll execute it. */
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001244#if OMP_41_ENABLED
1245 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1246#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001247 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001248#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001249 { // Execute this task immediately
1250 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1251 if ( serialize_immediate )
1252 new_taskdata -> td_flags.task_serial = 1;
1253 __kmp_invoke_task( gtid, new_task, current_task );
1254 }
1255
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001256#if OMPT_SUPPORT
1257 if (ompt_status & ompt_status_track) {
1258 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1259 }
1260#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001261
1262 return TASK_CURRENT_NOT_QUEUED;
1263}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001264
1265//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001266// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1267// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001268// loc_ref: location of original task pragma (ignored)
1269// gtid: Global Thread ID of encountering thread
1270// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1271// returns:
1272//
1273// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1274// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1275
1276kmp_int32
1277__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1278{
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001279 kmp_taskdata_t * new_taskdata;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001280 kmp_int32 res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001281
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001282 new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001283 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1284 gtid, loc_ref, new_taskdata ) );
1285
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001286 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001287
1288 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1289 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001290 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001291}
1292
Jim Cownie5e8470a2013-09-27 10:38:44 +00001293//-------------------------------------------------------------------------------------
1294// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1295
1296kmp_int32
1297__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1298{
1299 kmp_taskdata_t * taskdata;
1300 kmp_info_t * thread;
1301 int thread_finished = FALSE;
1302
1303 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1304 gtid, loc_ref) );
1305
1306 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1307 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1308
1309 thread = __kmp_threads[ gtid ];
1310 taskdata = thread -> th.th_current_task;
1311#if USE_ITT_BUILD
1312 // Note: These values are used by ITT events as well.
1313#endif /* USE_ITT_BUILD */
1314 taskdata->td_taskwait_counter += 1;
1315 taskdata->td_taskwait_ident = loc_ref;
1316 taskdata->td_taskwait_thread = gtid + 1;
1317
1318#if USE_ITT_BUILD
1319 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1320 if ( itt_sync_obj != NULL )
1321 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1322#endif /* USE_ITT_BUILD */
1323
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001324#if OMP_41_ENABLED
1325 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1326#else
1327 if ( ! taskdata->td_flags.team_serial )
1328#endif
1329 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001330 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001331 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001332 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001333 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1334 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001335 }
1336 }
1337#if USE_ITT_BUILD
1338 if ( itt_sync_obj != NULL )
1339 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1340#endif /* USE_ITT_BUILD */
1341
1342 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1343 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1344 }
1345
1346 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1347 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1348
1349 return TASK_CURRENT_NOT_QUEUED;
1350}
1351
1352
1353//-------------------------------------------------
1354// __kmpc_omp_taskyield: switch to a different task
1355
1356kmp_int32
1357__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1358{
1359 kmp_taskdata_t * taskdata;
1360 kmp_info_t * thread;
1361 int thread_finished = FALSE;
1362
Jonathan Peyton45be4502015-08-11 21:36:41 +00001363 KMP_COUNT_BLOCK(OMP_TASKYIELD);
1364
Jim Cownie5e8470a2013-09-27 10:38:44 +00001365 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1366 gtid, loc_ref, end_part) );
1367
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001368 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001369 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1370
1371 thread = __kmp_threads[ gtid ];
1372 taskdata = thread -> th.th_current_task;
1373 // Should we model this as a task wait or not?
1374#if USE_ITT_BUILD
1375 // Note: These values are used by ITT events as well.
1376#endif /* USE_ITT_BUILD */
1377 taskdata->td_taskwait_counter += 1;
1378 taskdata->td_taskwait_ident = loc_ref;
1379 taskdata->td_taskwait_thread = gtid + 1;
1380
1381#if USE_ITT_BUILD
1382 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1383 if ( itt_sync_obj != NULL )
1384 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1385#endif /* USE_ITT_BUILD */
1386 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001387 kmp_task_team_t * task_team = thread->th.th_task_team;
1388 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001389 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001390 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1391 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1392 }
1393 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001394 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001395#if USE_ITT_BUILD
1396 if ( itt_sync_obj != NULL )
1397 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1398#endif /* USE_ITT_BUILD */
1399
1400 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1401 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1402 }
1403
1404 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1405 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1406
1407 return TASK_CURRENT_NOT_QUEUED;
1408}
1409
1410
1411#if OMP_40_ENABLED
1412//-------------------------------------------------------------------------------------
1413// __kmpc_taskgroup: Start a new taskgroup
1414
1415void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001416__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001417{
1418 kmp_info_t * thread = __kmp_threads[ gtid ];
1419 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1420 kmp_taskgroup_t * tg_new =
1421 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1422 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1423 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001424 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001425 tg_new->parent = taskdata->td_taskgroup;
1426 taskdata->td_taskgroup = tg_new;
1427}
1428
1429
1430//-------------------------------------------------------------------------------------
1431// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1432// and its descendants are complete
1433
1434void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001435__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001436{
1437 kmp_info_t * thread = __kmp_threads[ gtid ];
1438 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1439 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1440 int thread_finished = FALSE;
1441
1442 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1443 KMP_DEBUG_ASSERT( taskgroup != NULL );
1444
1445 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1446#if USE_ITT_BUILD
1447 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1448 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1449 if ( itt_sync_obj != NULL )
1450 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1451#endif /* USE_ITT_BUILD */
1452
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001453#if OMP_41_ENABLED
1454 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1455#else
1456 if ( ! taskdata->td_flags.team_serial )
1457#endif
1458 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001459 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001460 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001461 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1462 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001463 }
1464 }
1465
1466#if USE_ITT_BUILD
1467 if ( itt_sync_obj != NULL )
1468 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1469#endif /* USE_ITT_BUILD */
1470 }
1471 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1472
1473 // Restore parent taskgroup for the current task
1474 taskdata->td_taskgroup = taskgroup->parent;
1475 __kmp_thread_free( thread, taskgroup );
1476
1477 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1478}
1479#endif
1480
1481
1482//------------------------------------------------------
1483// __kmp_remove_my_task: remove a task from my own deque
1484
1485static kmp_task_t *
1486__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1487 kmp_int32 is_constrained )
1488{
1489 kmp_task_t * task;
1490 kmp_taskdata_t * taskdata;
1491 kmp_thread_data_t *thread_data;
1492 kmp_uint32 tail;
1493
1494 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1495 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1496
1497 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1498
1499 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1500 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1501 thread_data->td.td_deque_tail) );
1502
1503 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1504 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1505 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1506 thread_data->td.td_deque_tail) );
1507 return NULL;
1508 }
1509
1510 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1511
1512 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1513 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1514 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1515 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1516 thread_data->td.td_deque_tail) );
1517 return NULL;
1518 }
1519
1520 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1521 taskdata = thread_data -> td.td_deque[ tail ];
1522
1523 if (is_constrained) {
1524 // we need to check if the candidate obeys task scheduling constraint:
1525 // only child of current task can be scheduled
1526 kmp_taskdata_t * current = thread->th.th_current_task;
1527 kmp_int32 level = current->td_level;
1528 kmp_taskdata_t * parent = taskdata->td_parent;
1529 while ( parent != current && parent->td_level > level ) {
1530 parent = parent->td_parent; // check generation up to the level of the current task
1531 KMP_DEBUG_ASSERT(parent != NULL);
1532 }
1533 if ( parent != current ) {
1534 // If the tail task is not a child, then no other childs can appear in the deque.
1535 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1536 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1537 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1538 thread_data->td.td_deque_tail) );
1539 return NULL;
1540 }
1541 }
1542
1543 thread_data -> td.td_deque_tail = tail;
1544 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1545
1546 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1547
1548 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1549 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1550 thread_data->td.td_deque_tail) );
1551
1552 task = KMP_TASKDATA_TO_TASK( taskdata );
1553 return task;
1554}
1555
1556
1557//-----------------------------------------------------------
1558// __kmp_steal_task: remove a task from another thread's deque
1559// Assume that calling thread has already checked existence of
1560// task_team thread_data before calling this routine.
1561
1562static kmp_task_t *
1563__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1564 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1565 kmp_int32 is_constrained )
1566{
1567 kmp_task_t * task;
1568 kmp_taskdata_t * taskdata;
1569 kmp_thread_data_t *victim_td, *threads_data;
Jonathan Peyton7c4d66d2015-06-08 20:01:14 +00001570 kmp_int32 victim_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001571
1572 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1573
1574 threads_data = task_team -> tt.tt_threads_data;
1575 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1576
1577 victim_tid = victim->th.th_info.ds.ds_tid;
1578 victim_td = & threads_data[ victim_tid ];
1579
1580 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1581 "head=%u tail=%u\n",
1582 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1583 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1584
1585 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1586 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1587 {
1588 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1589 "ntasks=%d head=%u tail=%u\n",
1590 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1591 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1592 return NULL;
1593 }
1594
1595 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1596
1597 // Check again after we acquire the lock
1598 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1599 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1600 {
1601 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1602 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1603 "ntasks=%d head=%u tail=%u\n",
1604 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1605 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1606 return NULL;
1607 }
1608
1609 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1610
1611 if ( !is_constrained ) {
1612 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1613 // Bump head pointer and Wrap.
1614 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1615 } else {
1616 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1617 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1618 taskdata = victim_td -> td.td_deque[ tail ];
1619 // we need to check if the candidate obeys task scheduling constraint:
1620 // only child of current task can be scheduled
1621 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1622 kmp_int32 level = current->td_level;
1623 kmp_taskdata_t * parent = taskdata->td_parent;
1624 while ( parent != current && parent->td_level > level ) {
1625 parent = parent->td_parent; // check generation up to the level of the current task
1626 KMP_DEBUG_ASSERT(parent != NULL);
1627 }
1628 if ( parent != current ) {
1629 // If the tail task is not a child, then no other childs can appear in the deque (?).
1630 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1631 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1632 "ntasks=%d head=%u tail=%u\n",
1633 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1634 task_team, victim_td->td.td_deque_ntasks,
1635 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1636 return NULL;
1637 }
1638 victim_td -> td.td_deque_tail = tail;
1639 }
1640 if (*thread_finished) {
1641 // We need to un-mark this victim as a finished victim. This must be done before
1642 // releasing the lock, or else other threads (starting with the master victim)
1643 // might be prematurely released from the barrier!!!
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001644 kmp_uint32 count;
1645
1646 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001647
1648 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1649 gtid, count + 1, task_team) );
1650
1651 *thread_finished = FALSE;
1652 }
1653 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1654
1655 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1656
Jonathan Peyton45be4502015-08-11 21:36:41 +00001657 KMP_COUNT_BLOCK(TASK_stolen);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001658 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001659 "ntasks=%d head=%u tail=%u\n",
1660 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1661 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1662 victim_td->td.td_deque_tail) );
1663
1664 task = KMP_TASKDATA_TO_TASK( taskdata );
1665 return task;
1666}
1667
1668
1669//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001670// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001671// is statisfied (return true) or there are none left (return false).
1672// final_spin is TRUE if this is the spin at the release barrier.
1673// thread_finished indicates whether the thread is finished executing all
1674// the tasks it has on its deque, and is at the release barrier.
1675// spinner is the location on which to spin.
1676// spinner == NULL means only execute a single task and return.
1677// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001678template <class C>
1679static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1680 int *thread_finished
1681 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001682{
1683 kmp_task_team_t * task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001684 kmp_thread_data_t * threads_data;
1685 kmp_task_t * task;
1686 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1687 volatile kmp_uint32 * unfinished_threads;
1688 kmp_int32 nthreads, last_stolen, k, tid;
1689
1690 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1691 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1692
1693 task_team = thread -> th.th_task_team;
1694 KMP_DEBUG_ASSERT( task_team != NULL );
1695
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001696 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001697 gtid, final_spin, *thread_finished) );
1698
1699 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1700 KMP_DEBUG_ASSERT( threads_data != NULL );
1701
1702 nthreads = task_team -> tt.tt_nproc;
1703 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001704#if OMP_41_ENABLED
1705 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1706#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001707 KMP_DEBUG_ASSERT( nthreads > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001708#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001709 KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1710
1711 // Choose tasks from our own work queue.
1712 start:
1713 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1714#if USE_ITT_BUILD && USE_ITT_NOTIFY
1715 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1716 if ( itt_sync_obj == NULL ) {
1717 // we are at fork barrier where we could not get the object reliably
1718 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1719 }
1720 __kmp_itt_task_starting( itt_sync_obj );
1721 }
1722#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1723 __kmp_invoke_task( gtid, task, current_task );
1724#if USE_ITT_BUILD
1725 if ( itt_sync_obj != NULL )
1726 __kmp_itt_task_finished( itt_sync_obj );
1727#endif /* USE_ITT_BUILD */
1728
1729 // If this thread is only partway through the barrier and the condition
1730 // is met, then return now, so that the barrier gather/release pattern can proceed.
1731 // If this thread is in the last spin loop in the barrier, waiting to be
1732 // released, we know that the termination condition will not be satisified,
1733 // so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001734 if (flag == NULL || (!final_spin && flag->done_check())) {
1735 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001736 return TRUE;
1737 }
1738 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1739 }
1740
1741 // This thread's work queue is empty. If we are in the final spin loop
1742 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001743#if OMP_41_ENABLED
1744 // The work queue may be empty but there might be proxy tasks still executing
1745 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1746#else
1747 if (final_spin)
1748#endif
1749 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001750 // First, decrement the #unfinished threads, if that has not already
1751 // been done. This decrement might be to the spin location, and
1752 // result in the termination condition being satisfied.
1753 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001754 kmp_uint32 count;
1755
1756 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001757 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001758 gtid, count, task_team) );
1759 *thread_finished = TRUE;
1760 }
1761
1762 // It is now unsafe to reference thread->th.th_team !!!
1763 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1764 // thread to pass through the barrier, where it might reset each thread's
1765 // th.th_team field for the next parallel region.
1766 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001767 if (flag != NULL && flag->done_check()) {
1768 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001769 return TRUE;
1770 }
1771 }
1772
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001773#if OMP_41_ENABLED
1774 // check if there are other threads to steal from, otherwise go back
1775 if ( nthreads == 1 )
1776 goto start;
1777#endif
1778
Jim Cownie5e8470a2013-09-27 10:38:44 +00001779 // Try to steal from the last place I stole from successfully.
1780 tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1781 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1782
1783 if (last_stolen != -1) {
1784 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1785
1786 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1787 thread_finished, is_constrained )) != NULL)
1788 {
1789#if USE_ITT_BUILD && USE_ITT_NOTIFY
1790 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1791 if ( itt_sync_obj == NULL ) {
1792 // we are at fork barrier where we could not get the object reliably
1793 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1794 }
1795 __kmp_itt_task_starting( itt_sync_obj );
1796 }
1797#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1798 __kmp_invoke_task( gtid, task, current_task );
1799#if USE_ITT_BUILD
1800 if ( itt_sync_obj != NULL )
1801 __kmp_itt_task_finished( itt_sync_obj );
1802#endif /* USE_ITT_BUILD */
1803
1804 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001805 if (flag == NULL || (!final_spin && flag->done_check())) {
1806 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001807 gtid) );
1808 return TRUE;
1809 }
1810
1811 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1812 // If the execution of the stolen task resulted in more tasks being
1813 // placed on our run queue, then restart the whole process.
1814 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001815 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001816 gtid) );
1817 goto start;
1818 }
1819 }
1820
1821 // Don't give priority to stealing from this thread anymore.
1822 threads_data[ tid ].td.td_deque_last_stolen = -1;
1823
1824 // The victims's work queue is empty. If we are in the final spin loop
1825 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001826#if OMP_41_ENABLED
1827 // The work queue may be empty but there might be proxy tasks still executing
1828 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1829#else
1830 if (final_spin)
1831#endif
1832 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001833 // First, decrement the #unfinished threads, if that has not already
1834 // been done. This decrement might be to the spin location, and
1835 // result in the termination condition being satisfied.
1836 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001837 kmp_uint32 count;
1838
1839 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001840 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001841 "task_team=%p\n", gtid, count, task_team) );
1842 *thread_finished = TRUE;
1843 }
1844
1845 // If __kmp_tasking_mode != tskm_immediate_exec
1846 // then it is now unsafe to reference thread->th.th_team !!!
1847 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1848 // thread to pass through the barrier, where it might reset each thread's
1849 // th.th_team field for the next parallel region.
1850 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001851 if (flag != NULL && flag->done_check()) {
1852 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001853 gtid) );
1854 return TRUE;
1855 }
1856 }
1857 }
1858
1859 // Find a different thread to steal work from. Pick a random thread.
1860 // My initial plan was to cycle through all the threads, and only return
1861 // if we tried to steal from every thread, and failed. Arch says that's
1862 // not such a great idea.
1863 // GEH - need yield code in this loop for throughput library mode?
1864 new_victim:
1865 k = __kmp_get_random( thread ) % (nthreads - 1);
1866 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1867 ++k; // Adjusts random distribution to exclude self
1868 }
1869 {
1870 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1871 int first;
1872
1873 // There is a slight chance that __kmp_enable_tasking() did not wake up
1874 // all threads waiting at the barrier. If this thread is sleeping, then
1875 // then wake it up. Since we weree going to pay the cache miss penalty
1876 // for referenceing another thread's kmp_info_t struct anyway, the check
1877 // shouldn't cost too much performance at this point.
1878 // In extra barrier mode, tasks do not sleep at the separate tasking
1879 // barrier, so this isn't a problem.
1880 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1881 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1882 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1883 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001884 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001885 // A sleeping thread should not have any tasks on it's queue.
Alp Toker8f2d3f02014-02-24 10:40:15 +00001886 // There is a slight possibility that it resumes, steals a task from
Jim Cownie5e8470a2013-09-27 10:38:44 +00001887 // another thread, which spawns more tasks, all in the that it takes
1888 // this thread to check => don't write an assertion that the victim's
1889 // queue is empty. Try stealing from a different thread.
1890 goto new_victim;
1891 }
1892
1893 // Now try to steal work from the selected thread
1894 first = TRUE;
1895 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1896 thread_finished, is_constrained )) != NULL)
1897 {
1898#if USE_ITT_BUILD && USE_ITT_NOTIFY
1899 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1900 if ( itt_sync_obj == NULL ) {
1901 // we are at fork barrier where we could not get the object reliably
1902 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1903 }
1904 __kmp_itt_task_starting( itt_sync_obj );
1905 }
1906#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1907 __kmp_invoke_task( gtid, task, current_task );
1908#if USE_ITT_BUILD
1909 if ( itt_sync_obj != NULL )
1910 __kmp_itt_task_finished( itt_sync_obj );
1911#endif /* USE_ITT_BUILD */
1912
1913 // Try stealing from this victim again, in the future.
1914 if (first) {
1915 threads_data[ tid ].td.td_deque_last_stolen = k;
1916 first = FALSE;
1917 }
1918
1919 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001920 if (flag == NULL || (!final_spin && flag->done_check())) {
1921 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001922 gtid) );
1923 return TRUE;
1924 }
1925 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1926
1927 // If the execution of the stolen task resulted in more tasks being
1928 // placed on our run queue, then restart the whole process.
1929 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001930 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001931 gtid) );
1932 goto start;
1933 }
1934 }
1935
1936 // The victims's work queue is empty. If we are in the final spin loop
1937 // of the barrier, check and see if the termination condition is satisfied.
1938 // Going on and finding a new victim to steal from is expensive, as it
1939 // involves a lot of cache misses, so we definitely want to re-check the
1940 // termination condition before doing that.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001941#if OMP_41_ENABLED
1942 // The work queue may be empty but there might be proxy tasks still executing
1943 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1944#else
1945 if (final_spin)
1946#endif
1947 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001948 // First, decrement the #unfinished threads, if that has not already
1949 // been done. This decrement might be to the spin location, and
1950 // result in the termination condition being satisfied.
1951 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001952 kmp_uint32 count;
1953
1954 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001955 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001956 "task_team=%p\n",
1957 gtid, count, task_team) );
1958 *thread_finished = TRUE;
1959 }
1960
1961 // If __kmp_tasking_mode != tskm_immediate_exec,
1962 // then it is now unsafe to reference thread->th.th_team !!!
1963 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1964 // thread to pass through the barrier, where it might reset each thread's
1965 // th.th_team field for the next parallel region.
1966 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001967 if (flag != NULL && flag->done_check()) {
1968 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969 return TRUE;
1970 }
1971 }
1972 }
1973
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001974 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001975 return FALSE;
1976}
1977
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001978int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1979 int *thread_finished
1980 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1981{
1982 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1983 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1984}
1985
1986int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
1987 int *thread_finished
1988 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1989{
1990 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1991 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1992}
1993
1994int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
1995 int *thread_finished
1996 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1997{
1998 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1999 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2000}
2001
2002
Jim Cownie5e8470a2013-09-27 10:38:44 +00002003
2004//-----------------------------------------------------------------------------
2005// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2006// next barrier so they can assist in executing enqueued tasks.
2007// First thread in allocates the task team atomically.
2008
2009static void
2010__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2011{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002012 kmp_thread_data_t *threads_data;
2013 int nthreads, i, is_init_thread;
2014
2015 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2016 __kmp_gtid_from_thread( this_thr ) ) );
2017
2018 KMP_DEBUG_ASSERT(task_team != NULL);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002019 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002020
2021 nthreads = task_team->tt.tt_nproc;
2022 KMP_DEBUG_ASSERT(nthreads > 0);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002023 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002024
2025 // Allocate or increase the size of threads_data if necessary
2026 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2027
2028 if (!is_init_thread) {
2029 // Some other thread already set up the array.
2030 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2031 __kmp_gtid_from_thread( this_thr ) ) );
2032 return;
2033 }
2034 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2035 KMP_DEBUG_ASSERT( threads_data != NULL );
2036
2037 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2038 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2039 {
2040 // Release any threads sleeping at the barrier, so that they can steal
2041 // tasks and execute them. In extra barrier mode, tasks do not sleep
2042 // at the separate tasking barrier, so this isn't a problem.
2043 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002044 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002045 kmp_info_t *thread = threads_data[i].td.td_thr;
2046
2047 if (i == this_thr->th.th_info.ds.ds_tid) {
2048 continue;
2049 }
2050 // Since we haven't locked the thread's suspend mutex lock at this
2051 // point, there is a small window where a thread might be putting
2052 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002053 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002054 // see if other threads are sleeping (using the same random
2055 // mechanism that is used for task stealing) and awakens them if
2056 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002057 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002058 {
2059 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2060 __kmp_gtid_from_thread( this_thr ),
2061 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002062 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002063 }
2064 else {
2065 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2066 __kmp_gtid_from_thread( this_thr ),
2067 __kmp_gtid_from_thread( thread ) ) );
2068 }
2069 }
2070 }
2071
2072 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2073 __kmp_gtid_from_thread( this_thr ) ) );
2074}
2075
2076
2077/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002078/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00002079 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2080 * like a shadow of the kmp_team_t data struct, with a different lifetime.
2081 * After a child * thread checks into a barrier and calls __kmp_release() from
2082 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2083 * longer assume that the kmp_team_t structure is intact (at any moment, the
2084 * master thread may exit the barrier code and free the team data structure,
2085 * and return the threads to the thread pool).
2086 *
2087 * This does not work with the the tasking code, as the thread is still
2088 * expected to participate in the execution of any tasks that may have been
2089 * spawned my a member of the team, and the thread still needs access to all
2090 * to each thread in the team, so that it can steal work from it.
2091 *
2092 * Enter the existence of the kmp_task_team_t struct. It employs a reference
2093 * counting mechanims, and is allocated by the master thread before calling
2094 * __kmp_<barrier_kind>_release, and then is release by the last thread to
2095 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2096 * of the kmp_task_team_t structs for consecutive barriers can overlap
2097 * (and will, unless the master thread is the last thread to exit the barrier
2098 * release phase, which is not typical).
2099 *
2100 * The existence of such a struct is useful outside the context of tasking,
2101 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2102 * so that any performance differences show up when comparing the 2.5 vs. 3.0
2103 * libraries.
2104 *
2105 * We currently use the existence of the threads array as an indicator that
2106 * tasks were spawned since the last barrier. If the structure is to be
2107 * useful outside the context of tasking, then this will have to change, but
2108 * not settting the field minimizes the performance impact of tasking on
2109 * barriers, when no explicit tasks were spawned (pushed, actually).
2110 */
2111
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002112
Jim Cownie5e8470a2013-09-27 10:38:44 +00002113static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2114// Lock for task team data structures
2115static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2116
2117
2118//------------------------------------------------------------------------------
2119// __kmp_alloc_task_deque:
2120// Allocates a task deque for a particular thread, and initialize the necessary
2121// data structures relating to the deque. This only happens once per thread
2122// per task team since task teams are recycled.
2123// No lock is needed during allocation since each thread allocates its own
2124// deque.
2125
2126static void
2127__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2128{
2129 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2130 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2131
2132 // Initialize last stolen task field to "none"
2133 thread_data -> td.td_deque_last_stolen = -1;
2134
2135 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2136 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2137 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2138
2139 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2140 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2141 // Allocate space for task deque, and zero the deque
2142 // Cannot use __kmp_thread_calloc() because threads not around for
2143 // kmp_reap_task_team( ).
2144 thread_data -> td.td_deque = (kmp_taskdata_t **)
2145 __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2146}
2147
2148
2149//------------------------------------------------------------------------------
2150// __kmp_free_task_deque:
2151// Deallocates a task deque for a particular thread.
2152// Happens at library deallocation so don't need to reset all thread data fields.
2153
2154static void
2155__kmp_free_task_deque( kmp_thread_data_t *thread_data )
2156{
2157 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2158
2159 if ( thread_data -> td.td_deque != NULL ) {
2160 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2161 __kmp_free( thread_data -> td.td_deque );
2162 thread_data -> td.td_deque = NULL;
2163 }
2164 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2165
2166#ifdef BUILD_TIED_TASK_STACK
2167 // GEH: Figure out what to do here for td_susp_tied_tasks
2168 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2169 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2170 }
2171#endif // BUILD_TIED_TASK_STACK
2172}
2173
2174
2175//------------------------------------------------------------------------------
2176// __kmp_realloc_task_threads_data:
2177// Allocates a threads_data array for a task team, either by allocating an initial
2178// array or enlarging an existing array. Only the first thread to get the lock
2179// allocs or enlarges the array and re-initializes the array eleemnts.
2180// That thread returns "TRUE", the rest return "FALSE".
2181// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2182// The current size is given by task_team -> tt.tt_max_threads.
2183
2184static int
2185__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2186{
2187 kmp_thread_data_t ** threads_data_p;
2188 kmp_int32 nthreads, maxthreads;
2189 int is_init_thread = FALSE;
2190
2191 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2192 // Already reallocated and initialized.
2193 return FALSE;
2194 }
2195
2196 threads_data_p = & task_team -> tt.tt_threads_data;
2197 nthreads = task_team -> tt.tt_nproc;
2198 maxthreads = task_team -> tt.tt_max_threads;
2199
2200 // All threads must lock when they encounter the first task of the implicit task
2201 // region to make sure threads_data fields are (re)initialized before used.
2202 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2203
2204 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2205 // first thread to enable tasking
2206 kmp_team_t *team = thread -> th.th_team;
2207 int i;
2208
2209 is_init_thread = TRUE;
2210 if ( maxthreads < nthreads ) {
2211
2212 if ( *threads_data_p != NULL ) {
2213 kmp_thread_data_t *old_data = *threads_data_p;
2214 kmp_thread_data_t *new_data = NULL;
2215
2216 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2217 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2218 __kmp_gtid_from_thread( thread ), task_team,
2219 nthreads, maxthreads ) );
2220 // Reallocate threads_data to have more elements than current array
2221 // Cannot use __kmp_thread_realloc() because threads not around for
2222 // kmp_reap_task_team( ). Note all new array entries are initialized
2223 // to zero by __kmp_allocate().
2224 new_data = (kmp_thread_data_t *)
2225 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2226 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002227 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002228 (void *) old_data,
2229 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002230
2231#ifdef BUILD_TIED_TASK_STACK
2232 // GEH: Figure out if this is the right thing to do
2233 for (i = maxthreads; i < nthreads; i++) {
2234 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2235 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2236 }
2237#endif // BUILD_TIED_TASK_STACK
2238 // Install the new data and free the old data
2239 (*threads_data_p) = new_data;
2240 __kmp_free( old_data );
2241 }
2242 else {
2243 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2244 "threads data for task_team %p, size = %d\n",
2245 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2246 // Make the initial allocate for threads_data array, and zero entries
2247 // Cannot use __kmp_thread_calloc() because threads not around for
2248 // kmp_reap_task_team( ).
2249 *threads_data_p = (kmp_thread_data_t *)
2250 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2251#ifdef BUILD_TIED_TASK_STACK
2252 // GEH: Figure out if this is the right thing to do
2253 for (i = 0; i < nthreads; i++) {
2254 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2255 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2256 }
2257#endif // BUILD_TIED_TASK_STACK
2258 }
2259 task_team -> tt.tt_max_threads = nthreads;
2260 }
2261 else {
2262 // If array has (more than) enough elements, go ahead and use it
2263 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2264 }
2265
2266 // initialize threads_data pointers back to thread_info structures
2267 for (i = 0; i < nthreads; i++) {
2268 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2269 thread_data -> td.td_thr = team -> t.t_threads[i];
2270
2271 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2272 // The last stolen field survives across teams / barrier, and the number
2273 // of threads may have changed. It's possible (likely?) that a new
2274 // parallel region will exhibit the same behavior as the previous region.
2275 thread_data -> td.td_deque_last_stolen = -1;
2276 }
2277 }
2278
2279 KMP_MB();
2280 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2281 }
2282
2283 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2284 return is_init_thread;
2285}
2286
2287
2288//------------------------------------------------------------------------------
2289// __kmp_free_task_threads_data:
2290// Deallocates a threads_data array for a task team, including any attached
2291// tasking deques. Only occurs at library shutdown.
2292
2293static void
2294__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2295{
2296 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2297 if ( task_team -> tt.tt_threads_data != NULL ) {
2298 int i;
2299 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2300 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2301 }
2302 __kmp_free( task_team -> tt.tt_threads_data );
2303 task_team -> tt.tt_threads_data = NULL;
2304 }
2305 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2306}
2307
2308
2309//------------------------------------------------------------------------------
2310// __kmp_allocate_task_team:
2311// Allocates a task team associated with a specific team, taking it from
2312// the global task team free list if possible. Also initializes data structures.
2313
2314static kmp_task_team_t *
2315__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2316{
2317 kmp_task_team_t *task_team = NULL;
2318 int nthreads;
2319
2320 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2321 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2322
2323 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2324 // Take a task team from the task team pool
2325 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2326 if (__kmp_free_task_teams != NULL) {
2327 task_team = __kmp_free_task_teams;
2328 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2329 task_team -> tt.tt_next = NULL;
2330 }
2331 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2332 }
2333
2334 if (task_team == NULL) {
2335 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2336 "task team for team %p\n",
2337 __kmp_gtid_from_thread( thread ), team ) );
2338 // Allocate a new task team if one is not available.
2339 // Cannot use __kmp_thread_malloc() because threads not around for
2340 // kmp_reap_task_team( ).
2341 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2342 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2343 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2344 //task_team -> tt.tt_max_threads = 0;
2345 //task_team -> tt.tt_next = NULL;
2346 }
2347
2348 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002349#if OMP_41_ENABLED
2350 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2351#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002352 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2353
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2355 TCW_4( task_team -> tt.tt_active, TRUE );
2356 TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2357
2358 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2359 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2360 return task_team;
2361}
2362
2363
2364//------------------------------------------------------------------------------
2365// __kmp_free_task_team:
2366// Frees the task team associated with a specific thread, and adds it
2367// to the global task team free list.
2368//
2369
2370static void
2371__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2372{
2373 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2374 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2375
2376 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2377
2378 // Put task team back on free list
2379 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2380
2381 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2382 task_team -> tt.tt_next = __kmp_free_task_teams;
2383 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2384 TCW_PTR(__kmp_free_task_teams, task_team);
2385
2386 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2387}
2388
2389
2390//------------------------------------------------------------------------------
2391// __kmp_reap_task_teams:
2392// Free all the task teams on the task team free list.
2393// Should only be done during library shutdown.
2394// Cannot do anything that needs a thread structure or gtid since they are already gone.
2395
2396void
2397__kmp_reap_task_teams( void )
2398{
2399 kmp_task_team_t *task_team;
2400
2401 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2402 // Free all task_teams on the free list
2403 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2404 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2405 __kmp_free_task_teams = task_team -> tt.tt_next;
2406 task_team -> tt.tt_next = NULL;
2407
2408 // Free threads_data if necessary
2409 if ( task_team -> tt.tt_threads_data != NULL ) {
2410 __kmp_free_task_threads_data( task_team );
2411 }
2412 __kmp_free( task_team );
2413 }
2414 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2415 }
2416}
2417
2418
2419//------------------------------------------------------------------------------
2420// __kmp_unref_task_teams:
2421// Remove one thread from referencing the task team structure by
2422// decreasing the reference count and deallocate task team if no more
2423// references to it.
2424//
2425void
2426__kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2427{
2428 kmp_uint ref_ct;
2429
2430 ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2431
2432 KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2433 __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2434
2435
2436 if ( ref_ct == 0 ) {
2437 __kmp_free_task_team( thread, task_team );
2438 }
2439
2440 TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2441}
2442
2443
2444//------------------------------------------------------------------------------
2445// __kmp_wait_to_unref_task_teams:
2446// Some threads could still be in the fork barrier release code, possibly
2447// trying to steal tasks. Wait for each thread to unreference its task team.
2448//
2449void
2450__kmp_wait_to_unref_task_teams(void)
2451{
2452 kmp_info_t *thread;
2453 kmp_uint32 spins;
2454 int done;
2455
2456 KMP_INIT_YIELD( spins );
2457
2458
2459 for (;;) {
2460 done = TRUE;
2461
2462 // TODO: GEH - this may be is wrong because some sync would be necessary
2463 // in case threads are added to the pool during the traversal.
2464 // Need to verify that lock for thread pool is held when calling
2465 // this routine.
2466 for (thread = (kmp_info_t *)__kmp_thread_pool;
2467 thread != NULL;
2468 thread = thread->th.th_next_pool)
2469 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002470#if KMP_OS_WINDOWS
2471 DWORD exit_val;
2472#endif
2473 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2474 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2475 __kmp_gtid_from_thread( thread ) ) );
2476 continue;
2477 }
2478#if KMP_OS_WINDOWS
2479 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2480 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2481 if (TCR_PTR(thread->th.th_task_team) != NULL) {
2482 __kmp_unref_task_team( thread->th.th_task_team, thread );
2483 }
2484 continue;
2485 }
2486#endif
2487
2488 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2489
2490 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2491 __kmp_gtid_from_thread( thread ) ) );
2492
2493 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002494 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002495 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002496 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002497 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2498 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002499 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002500 }
2501 }
2502 }
2503 if (done) {
2504 break;
2505 }
2506
2507 // If we are oversubscribed,
2508 // or have waited a bit (and library mode is throughput), yield.
2509 // Pause is in the following code.
2510 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2511 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2512 }
2513
2514
2515}
2516
2517
2518//------------------------------------------------------------------------------
2519// __kmp_task_team_setup: Create a task_team for the current team, but use
2520// an already created, unused one if it already exists.
2521// This may be called by any thread, but only for teams with # threads >1.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002522void
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002523__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both, int always )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002524{
2525 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2526
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002527 if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( always || team->t.t_nproc > 1 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002528 // Allocate a new task team, which will be propagated to
2529 // all of the worker threads after the barrier. As they
2530 // spin in the barrier release phase, then will continue
2531 // to use the previous task team struct, until they receive
2532 // the signal to stop checking for tasks (they can't safely
2533 // reference the kmp_team_t struct, which could be reallocated
2534 // by the master thread).
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002535 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2536 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2537 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jim Cownie5e8470a2013-09-27 10:38:44 +00002538 ((team != NULL) ? team->t.t_id : -1)) );
2539 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002540 //else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002541 // All threads have reported in, and no tasks were spawned
2542 // for this release->gather region. Leave the old task
2543 // team struct in place for the upcoming region. No task
2544 // teams are formed for serialized teams.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002545 if (both) {
2546 int other_team = 1 - this_thr->th.th_task_state;
2547 if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well
2548 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2549 KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2550 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2551 ((team != NULL) ? team->t.t_id : -1)) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002552 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002553 }
2554}
2555
2556
2557//------------------------------------------------------------------------------
2558// __kmp_task_team_sync: Propagation of task team data from team to threads
2559// which happens just after the release phase of a team barrier. This may be
2560// called by any thread, but only for teams with # threads > 1.
2561
2562void
2563__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2564{
2565 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2566
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002567 // In case this thread never saw that the task team was no longer active, unref/deallocate it now.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002568 if ( this_thr->th.th_task_team != NULL ) {
2569 if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2570 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2571 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002572 } else { // We are re-using a task team that was never enabled.
2573 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002574 }
2575 }
2576
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002577 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jim Cownie5e8470a2013-09-27 10:38:44 +00002578 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002579 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2580 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002581 KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2582 __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2583 this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2584}
2585
2586
2587//------------------------------------------------------------------------------
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002588// __kmp_task_team_wait: Master thread waits for outstanding tasks after the
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002589// barrier gather phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created
Jim Cownie5e8470a2013-09-27 10:38:44 +00002590void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002591__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002592 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002593 )
2594{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002595 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002596
2597 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2598 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2599
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002600 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002601 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2602 __kmp_gtid_from_thread( this_thr ), task_team ) );
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002603 // All worker threads might have dropped through to the release phase, but could still
2604 // be executing tasks. Wait here for all tasks to complete. To avoid memory contention,
2605 // only the master thread checks for the termination condition.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002606 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2607 flag.wait(this_thr, TRUE
2608 USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002609
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002610 // Kill the old task team, so that the worker threads will stop referencing it while spinning.
2611 // They will deallocate it when the reference count reaches zero.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002612 // The master thread is not included in the ref count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002613 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2614 __kmp_gtid_from_thread( this_thr ), task_team ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002615#if OMP_41_ENABLED
2616 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2617 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2618#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002619 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002620#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002621 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2622 KMP_MB();
2623
2624 TCW_PTR(this_thr->th.th_task_team, NULL);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002625 team->t.t_task_team[this_thr->th.th_task_state] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002626 }
2627}
2628
2629
2630//------------------------------------------------------------------------------
2631// __kmp_tasking_barrier:
2632// Internal function to execute all tasks prior to a regular barrier or a
2633// join barrier. It is a full barrier itself, which unfortunately turns
2634// regular barriers into double barriers and join barriers into 1 1/2
2635// barriers.
2636// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2637
2638void
2639__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2640{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002641 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002642 int flag = FALSE;
2643 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2644
2645#if USE_ITT_BUILD
2646 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2647#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002648 kmp_flag_32 spin_flag(spin, 0U);
2649 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2650 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002651#if USE_ITT_BUILD
2652 // TODO: What about itt_sync_obj??
2653 KMP_FSYNC_SPIN_PREPARE( spin );
2654#endif /* USE_ITT_BUILD */
2655
2656 if( TCR_4(__kmp_global.g.g_done) ) {
2657 if( __kmp_global.g.g_abort )
2658 __kmp_abort_thread( );
2659 break;
2660 }
2661 KMP_YIELD( TRUE ); // GH: We always yield here
2662 }
2663#if USE_ITT_BUILD
2664 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2665#endif /* USE_ITT_BUILD */
2666}
2667
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002668
2669#if OMP_41_ENABLED
2670
2671/* __kmp_give_task puts a task into a given thread queue if:
2672 - the queue for that thread it was created
2673 - there's space in that queue
2674
2675 Because of this, __kmp_push_task needs to check if there's space after getting the lock
2676 */
2677static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2678{
2679 kmp_task_team_t * task_team = thread->th.th_task_team;
2680 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2681 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2682 bool result = false;
2683
2684 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2685
2686 // assert tasking is enabled? what if not?
2687 KMP_DEBUG_ASSERT( task_team != NULL );
2688
2689 if (thread_data -> td.td_deque == NULL ) {
2690 // There's no queue in this thread, go find another one
2691 // We're guaranteed that at least one thread has a queue
2692 KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2693 return result;
2694 }
2695
2696 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2697 {
2698 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2699 return result;
2700 }
2701
2702 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2703
2704 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2705 {
2706 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2707 goto release_and_exit;
2708 }
2709
2710 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2711 // Wrap index.
2712 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2713 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2714
2715 result = true;
Jonathan Peyton1406f012015-05-22 22:35:51 +00002716 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002717
2718release_and_exit:
2719 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2720
2721 return result;
2722}
2723
2724
2725/* The finish of the a proxy tasks is divided in two pieces:
2726 - the top half is the one that can be done from a thread outside the team
2727 - the bottom half must be run from a them within the team
2728
2729 In order to run the bottom half the task gets queued back into one of the threads of the team.
2730 Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2731 So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2732 - things that can be run before queuing the bottom half
2733 - things that must be run after queuing the bottom half
2734
2735 This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2736 we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2737*/
2738
2739static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2740{
2741 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2742 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2743 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2744 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2745
2746 taskdata -> td_flags.complete = 1; // mark the task as completed
2747
2748 if ( taskdata->td_taskgroup )
2749 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2750
2751 // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
2752 TCR_4(taskdata->td_incomplete_child_tasks++);
2753}
2754
2755static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2756{
2757 kmp_int32 children = 0;
2758
2759 // Predecrement simulated by "- 1" calculation
2760 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2761 KMP_DEBUG_ASSERT( children >= 0 );
2762
2763 // Remove the imaginary children
2764 TCR_4(taskdata->td_incomplete_child_tasks--);
2765}
2766
2767static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2768{
2769 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2770 kmp_info_t * thread = __kmp_threads[ gtid ];
2771
2772 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2773 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2774
2775 // We need to wait to make sure the top half is finished
2776 // Spinning here should be ok as this should happen quickly
2777 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2778
2779 __kmp_release_deps(gtid,taskdata);
2780 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2781}
2782
2783/*!
2784@ingroup TASKING
2785@param gtid Global Thread ID of encountering thread
2786@param ptask Task which execution is completed
2787
2788Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
2789*/
2790void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2791{
2792 KMP_DEBUG_ASSERT( ptask != NULL );
2793 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2794 KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2795
2796 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2797
2798 __kmp_first_top_half_finish_proxy(taskdata);
2799 __kmp_second_top_half_finish_proxy(taskdata);
2800 __kmp_bottom_half_finish_proxy(gtid,ptask);
2801
2802 KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2803}
2804
2805/*!
2806@ingroup TASKING
2807@param ptask Task which execution is completed
2808
2809Execute the completation of a proxy task from a thread that could not belong to the team.
2810*/
2811void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2812{
2813 KMP_DEBUG_ASSERT( ptask != NULL );
2814 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2815
2816 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2817
2818 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2819
2820 __kmp_first_top_half_finish_proxy(taskdata);
2821
2822 // Enqueue task to complete bottom half completation from a thread within the corresponding team
2823 kmp_team_t * team = taskdata->td_team;
2824 kmp_int32 nthreads = team->t.t_nproc;
2825 kmp_info_t *thread;
2826 kmp_int32 k = 0;
2827
2828 do {
Jonathan Peyton1406f012015-05-22 22:35:51 +00002829 //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002830 //For now we're just linearly trying to find a thread
2831 k = (k+1) % nthreads;
2832 thread = team->t.t_threads[k];
2833 } while ( !__kmp_give_task( thread, k, ptask ) );
2834
2835 __kmp_second_top_half_finish_proxy(taskdata);
2836
2837 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2838}
2839
2840#endif