blob: b43bf1e3068528607d5bd6cba1bd27a0d44be41f [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000020
Andrey Churbanove5f44922015-04-29 16:22:07 +000021#if OMPT_SUPPORT
22#include "ompt-specific.h"
23#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000024
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
26/* ------------------------------------------------------------------------ */
27/* ------------------------------------------------------------------------ */
28
29
30/* forward declaration */
31static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
34
Andrey Churbanov535b6fa2015-05-07 17:41:51 +000035#ifdef OMP_41_ENABLED
36static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
37#endif
38
Jim Cownie4cc4bb42014-10-07 16:25:50 +000039static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
40 switch (((kmp_flag_64 *)flag)->get_type()) {
41 case flag32: __kmp_resume_32(gtid, NULL); break;
42 case flag64: __kmp_resume_64(gtid, NULL); break;
43 case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
44 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000045}
46
47#ifdef BUILD_TIED_TASK_STACK
48
49//---------------------------------------------------------------------------
50// __kmp_trace_task_stack: print the tied tasks from the task stack in order
51// from top do bottom
52//
53// gtid: global thread identifier for thread containing stack
54// thread_data: thread data for task team thread containing stack
55// threshold: value above which the trace statement triggers
56// location: string identifying call site of this function (for trace)
57
58static void
59__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
60{
61 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
62 kmp_taskdata_t **stack_top = task_stack -> ts_top;
63 kmp_int32 entries = task_stack -> ts_entries;
64 kmp_taskdata_t *tied_task;
65
66 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
67 "first_block = %p, stack_top = %p \n",
68 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
69
70 KMP_DEBUG_ASSERT( stack_top != NULL );
71 KMP_DEBUG_ASSERT( entries > 0 );
72
73 while ( entries != 0 )
74 {
75 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
76 // fix up ts_top if we need to pop from previous block
77 if ( entries & TASK_STACK_INDEX_MASK == 0 )
78 {
79 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
80
81 stack_block = stack_block -> sb_prev;
82 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
83 }
84
85 // finish bookkeeping
86 stack_top--;
87 entries--;
88
89 tied_task = * stack_top;
90
91 KMP_DEBUG_ASSERT( tied_task != NULL );
92 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
93
94 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
95 "stack_top=%p, tied_task=%p\n",
96 location, gtid, entries, stack_top, tied_task ) );
97 }
98 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
99
100 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
101 location, gtid ) );
102}
103
104//---------------------------------------------------------------------------
105// __kmp_init_task_stack: initialize the task stack for the first time
106// after a thread_data structure is created.
107// It should not be necessary to do this again (assuming the stack works).
108//
109// gtid: global thread identifier of calling thread
110// thread_data: thread data for task team thread containing stack
111
112static void
113__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
114{
115 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
116 kmp_stack_block_t *first_block;
117
118 // set up the first block of the stack
119 first_block = & task_stack -> ts_first_block;
120 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
121 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
122
123 // initialize the stack to be empty
124 task_stack -> ts_entries = TASK_STACK_EMPTY;
125 first_block -> sb_next = NULL;
126 first_block -> sb_prev = NULL;
127}
128
129
130//---------------------------------------------------------------------------
131// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
132//
133// gtid: global thread identifier for calling thread
134// thread_data: thread info for thread containing stack
135
136static void
137__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
138{
139 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
140 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
141
142 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
143 // free from the second block of the stack
144 while ( stack_block != NULL ) {
145 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
146
147 stack_block -> sb_next = NULL;
148 stack_block -> sb_prev = NULL;
149 if (stack_block != & task_stack -> ts_first_block) {
150 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
151 }
152 stack_block = next_block;
153 }
154 // initialize the stack to be empty
155 task_stack -> ts_entries = 0;
156 task_stack -> ts_top = NULL;
157}
158
159
160//---------------------------------------------------------------------------
161// __kmp_push_task_stack: Push the tied task onto the task stack.
162// Grow the stack if necessary by allocating another block.
163//
164// gtid: global thread identifier for calling thread
165// thread: thread info for thread containing stack
166// tied_task: the task to push on the stack
167
168static void
169__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
170{
171 // GEH - need to consider what to do if tt_threads_data not allocated yet
172 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
173 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
174 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
175
176 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
177 return; // Don't push anything on stack if team or team tasks are serialized
178 }
179
180 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
181 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
182
183 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
184 gtid, thread, tied_task ) );
185 // Store entry
186 * (task_stack -> ts_top) = tied_task;
187
188 // Do bookkeeping for next push
189 task_stack -> ts_top++;
190 task_stack -> ts_entries++;
191
192 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
193 {
194 // Find beginning of this task block
195 kmp_stack_block_t *stack_block =
196 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
197
198 // Check if we already have a block
199 if ( stack_block -> sb_next != NULL )
200 { // reset ts_top to beginning of next block
201 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
202 }
203 else
204 { // Alloc new block and link it up
205 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
206 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
207
208 task_stack -> ts_top = & new_block -> sb_block[0];
209 stack_block -> sb_next = new_block;
210 new_block -> sb_prev = stack_block;
211 new_block -> sb_next = NULL;
212
213 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
214 gtid, tied_task, new_block ) );
215 }
216 }
217 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
218}
219
220//---------------------------------------------------------------------------
221// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
222// the task, just check to make sure it matches the ending task passed in.
223//
224// gtid: global thread identifier for the calling thread
225// thread: thread info structure containing stack
226// tied_task: the task popped off the stack
227// ending_task: the task that is ending (should match popped task)
228
229static void
230__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
231{
232 // GEH - need to consider what to do if tt_threads_data not allocated yet
233 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
234 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
235 kmp_taskdata_t *tied_task;
236
237 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
238 return; // Don't pop anything from stack if team or team tasks are serialized
239 }
240
241 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
242 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
243
244 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
245
246 // fix up ts_top if we need to pop from previous block
247 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
248 {
249 kmp_stack_block_t *stack_block =
250 (kmp_stack_block_t *) (task_stack -> ts_top) ;
251
252 stack_block = stack_block -> sb_prev;
253 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
254 }
255
256 // finish bookkeeping
257 task_stack -> ts_top--;
258 task_stack -> ts_entries--;
259
260 tied_task = * (task_stack -> ts_top );
261
262 KMP_DEBUG_ASSERT( tied_task != NULL );
263 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
264 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
265
266 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
267 return;
268}
269#endif /* BUILD_TIED_TASK_STACK */
270
271//---------------------------------------------------
272// __kmp_push_task: Add a task to the thread's deque
273
274static kmp_int32
275__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
276{
277 kmp_info_t * thread = __kmp_threads[ gtid ];
278 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
279 kmp_task_team_t * task_team = thread->th.th_task_team;
280 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
281 kmp_thread_data_t * thread_data;
282
283 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
284
285 // The first check avoids building task_team thread data if serialized
286 if ( taskdata->td_flags.task_serial ) {
287 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
288 gtid, taskdata ) );
289 return TASK_NOT_PUSHED;
290 }
291
292 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
293 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000294 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000295 __kmp_enable_tasking( task_team, thread );
296 }
297 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
298 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
299
300 // Find tasking deque specific to encountering thread
301 thread_data = & task_team -> tt.tt_threads_data[ tid ];
302
303 // No lock needed since only owner can allocate
304 if (thread_data -> td.td_deque == NULL ) {
305 __kmp_alloc_task_deque( thread, thread_data );
306 }
307
308 // Check if deque is full
309 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
310 {
311 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
312 gtid, taskdata ) );
313 return TASK_NOT_PUSHED;
314 }
315
316 // Lock the deque for the task push operation
317 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
318
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000319#if OMP_41_ENABLED
320 // Need to recheck as we can get a proxy task from a thread outside of OpenMP
321 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
322 {
323 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
324 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
325 gtid, taskdata ) );
326 return TASK_NOT_PUSHED;
327 }
328#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000329 // Must have room since no thread can add tasks but calling thread
330 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000331#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000332
333 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
334 // Wrap index.
335 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
336 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
337
338 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
339
340 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
341 "task=%p ntasks=%d head=%u tail=%u\n",
342 gtid, taskdata, thread_data->td.td_deque_ntasks,
343 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
344
345 return TASK_SUCCESSFULLY_PUSHED;
346}
347
348
349//-----------------------------------------------------------------------------------------
350// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
351// this_thr: thread structure to set current_task in.
352
353void
354__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
355{
356 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
357 "curtask_parent=%p\n",
358 0, this_thr, this_thr -> th.th_current_task,
359 this_thr -> th.th_current_task -> td_parent ) );
360
361 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
362
363 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
364 "curtask_parent=%p\n",
365 0, this_thr, this_thr -> th.th_current_task,
366 this_thr -> th.th_current_task -> td_parent ) );
367}
368
369
370//---------------------------------------------------------------------------------------
371// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
372// this_thr: thread structure to set up
373// team: team for implicit task data
374// tid: thread within team to set up
375
376void
377__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
378{
379 // current task of the thread is a parent of the new just created implicit tasks of new team
380 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
381 "parent_task=%p\n",
382 tid, this_thr, this_thr->th.th_current_task,
383 team->t.t_implicit_task_taskdata[tid].td_parent ) );
384
385 KMP_DEBUG_ASSERT (this_thr != NULL);
386
387 if( tid == 0 ) {
388 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
389 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
390 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
391 }
392 } else {
393 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
394 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
395 }
396
397 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
398 "parent_task=%p\n",
399 tid, this_thr, this_thr->th.th_current_task,
400 team->t.t_implicit_task_taskdata[tid].td_parent ) );
401}
402
403
404//----------------------------------------------------------------------
405// __kmp_task_start: bookkeeping for a task starting execution
406// GTID: global thread id of calling thread
407// task: task starting execution
408// current_task: task suspending
409
410static void
411__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
412{
413 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
414 kmp_info_t * thread = __kmp_threads[ gtid ];
415
416 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
417 gtid, taskdata, current_task) );
418
419 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
420
421 // mark currently executing task as suspended
422 // TODO: GEH - make sure root team implicit task is initialized properly.
423 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
424 current_task -> td_flags.executing = 0;
425
426 // Add task to stack if tied
427#ifdef BUILD_TIED_TASK_STACK
428 if ( taskdata -> td_flags.tiedness == TASK_TIED )
429 {
430 __kmp_push_task_stack( gtid, thread, taskdata );
431 }
432#endif /* BUILD_TIED_TASK_STACK */
433
434 // mark starting task as executing and as current task
435 thread -> th.th_current_task = taskdata;
436
437 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
438 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
439 taskdata -> td_flags.started = 1;
440 taskdata -> td_flags.executing = 1;
441 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
442 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
443
444 // GEH TODO: shouldn't we pass some sort of location identifier here?
445 // APT: yes, we will pass location here.
446 // need to store current thread state (in a thread or taskdata structure)
447 // before setting work_state, otherwise wrong state is set after end of task
448
449 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
450 gtid, taskdata ) );
451
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000452#if OMPT_SUPPORT
453 if ((ompt_status == ompt_status_track_callback) &&
454 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
455 kmp_taskdata_t *parent = taskdata->td_parent;
456 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
457 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
458 parent ? &(parent->ompt_task_info.frame) : NULL,
459 taskdata->ompt_task_info.task_id,
460 taskdata->ompt_task_info.function);
461 }
462#endif
463
Jim Cownie5e8470a2013-09-27 10:38:44 +0000464 return;
465}
466
467
468//----------------------------------------------------------------------
469// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
470// loc_ref: source location information; points to beginning of task block.
471// gtid: global thread number.
472// task: task thunk for the started task.
473
474void
475__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
476{
477 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
478 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
479
480 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
481 gtid, loc_ref, taskdata, current_task ) );
482
483 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
484 __kmp_task_start( gtid, task, current_task );
485
486 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
487 gtid, loc_ref, taskdata ) );
488
489 return;
490}
491
492#ifdef TASK_UNUSED
493//----------------------------------------------------------------------
494// __kmpc_omp_task_begin: report that a given task has started execution
495// NEVER GENERATED BY COMPILER, DEPRECATED!!!
496
497void
498__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
499{
500 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
501
502 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
503 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
504
505 __kmp_task_start( gtid, task, current_task );
506
507 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
508 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
509
510 return;
511}
512#endif // TASK_UNUSED
513
514
515//-------------------------------------------------------------------------------------
516// __kmp_free_task: free the current task space and the space for shareds
517// gtid: Global thread ID of calling thread
518// taskdata: task to free
519// thread: thread data structure of caller
520
521static void
522__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
523{
524 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
525 gtid, taskdata) );
526
527 // Check to make sure all flags and counters have the correct values
528 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
529 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
530 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
531 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
532 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
533 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
534
535 taskdata->td_flags.freed = 1;
536 // deallocate the taskdata and shared variable blocks associated with this task
537 #if USE_FAST_MEMORY
538 __kmp_fast_free( thread, taskdata );
539 #else /* ! USE_FAST_MEMORY */
540 __kmp_thread_free( thread, taskdata );
541 #endif
542
543 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
544 gtid, taskdata) );
545}
546
547//-------------------------------------------------------------------------------------
548// __kmp_free_task_and_ancestors: free the current task and ancestors without children
549//
550// gtid: Global thread ID of calling thread
551// taskdata: task to free
552// thread: thread data structure of caller
553
554static void
555__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
556{
557 kmp_int32 children = 0;
558 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
559
560 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
561
562 if ( !team_or_tasking_serialized ) {
563 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
564 KMP_DEBUG_ASSERT( children >= 0 );
565 }
566
567 // Now, go up the ancestor tree to see if any ancestors can now be freed.
568 while ( children == 0 )
569 {
570 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
571
572 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
573 "and freeing itself\n", gtid, taskdata) );
574
575 // --- Deallocate my ancestor task ---
576 __kmp_free_task( gtid, taskdata, thread );
577
578 taskdata = parent_taskdata;
579
580 // Stop checking ancestors at implicit task or if tasking serialized
581 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
582 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
583 return;
584
585 if ( !team_or_tasking_serialized ) {
586 // Predecrement simulated by "- 1" calculation
587 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
588 KMP_DEBUG_ASSERT( children >= 0 );
589 }
590 }
591
592 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
593 "not freeing it yet\n", gtid, taskdata, children) );
594}
595
596//---------------------------------------------------------------------
597// __kmp_task_finish: bookkeeping to do when a task finishes execution
598// gtid: global thread ID for calling thread
599// task: task to be finished
600// resumed_task: task to be resumed. (may be NULL if task is serialized)
601
602static void
603__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
604{
605 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
606 kmp_info_t * thread = __kmp_threads[ gtid ];
607 kmp_int32 children = 0;
608
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000609#if OMPT_SUPPORT
610 if ((ompt_status == ompt_status_track_callback) &&
611 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
612 kmp_taskdata_t *parent = taskdata->td_parent;
613 ompt_callbacks.ompt_callback(ompt_event_task_end)(
614 taskdata->ompt_task_info.task_id);
615 }
616#endif
617
Jim Cownie5e8470a2013-09-27 10:38:44 +0000618 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
619 gtid, taskdata, resumed_task) );
620
621 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
622
623 // Pop task from stack if tied
624#ifdef BUILD_TIED_TASK_STACK
625 if ( taskdata -> td_flags.tiedness == TASK_TIED )
626 {
627 __kmp_pop_task_stack( gtid, thread, taskdata );
628 }
629#endif /* BUILD_TIED_TASK_STACK */
630
Jim Cownie5e8470a2013-09-27 10:38:44 +0000631 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000632 taskdata -> td_flags.complete = 1; // mark the task as completed
633 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
634 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
635
636 // Only need to keep track of count if team parallel and tasking not serialized
637 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
638 // Predecrement simulated by "- 1" calculation
639 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
640 KMP_DEBUG_ASSERT( children >= 0 );
641#if OMP_40_ENABLED
642 if ( taskdata->td_taskgroup )
643 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000644 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000645#endif
646 }
647
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000648 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
649 // Othertwise, if a task is executed immediately from the release_deps code
650 // the flag will be reset to 1 again by this same function
651 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
652 taskdata -> td_flags.executing = 0; // suspend the finishing task
653
Jim Cownie5e8470a2013-09-27 10:38:44 +0000654 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
655 gtid, taskdata, children) );
656
Jim Cownie181b4bb2013-12-23 17:28:57 +0000657#if OMP_40_ENABLED
658 /* If the tasks' destructor thunk flag has been set, we need to invoke the
659 destructor thunk that has been generated by the compiler.
660 The code is placed here, since at this point other tasks might have been released
661 hence overlapping the destructor invokations with some other work in the
662 released tasks. The OpenMP spec is not specific on when the destructors are
663 invoked, so we should be free to choose.
664 */
665 if (taskdata->td_flags.destructors_thunk) {
666 kmp_routine_entry_t destr_thunk = task->destructors;
667 KMP_ASSERT(destr_thunk);
668 destr_thunk(gtid, task);
669 }
670#endif // OMP_40_ENABLED
671
Jim Cownie5e8470a2013-09-27 10:38:44 +0000672 // bookkeeping for resuming task:
673 // GEH - note tasking_ser => task_serial
674 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
675 taskdata->td_flags.task_serial);
676 if ( taskdata->td_flags.task_serial )
677 {
678 if (resumed_task == NULL) {
679 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
680 }
681 else {
682 // verify resumed task passed in points to parent
683 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
684 }
685 }
686 else {
687 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
688 }
689
690 // Free this task and then ancestor tasks if they have no children.
691 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
692
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000693 // FIXME johnmc: I this statement should be before the last one so if an
694 // asynchronous inquiry peers into the runtime system it doesn't see the freed
695 // task as the current task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000696 __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
697
698 // TODO: GEH - make sure root team implicit task is initialized properly.
699 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
700 resumed_task->td_flags.executing = 1; // resume previous task
701
702 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
703 gtid, taskdata, resumed_task) );
704
705 return;
706}
707
708//---------------------------------------------------------------------
709// __kmpc_omp_task_complete_if0: report that a task has completed execution
710// loc_ref: source location information; points to end of task block.
711// gtid: global thread number.
712// task: task thunk for the completed task.
713
714void
715__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
716{
717 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
718 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
719
720 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
721
722 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
723 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
724
725 return;
726}
727
728#ifdef TASK_UNUSED
729//---------------------------------------------------------------------
730// __kmpc_omp_task_complete: report that a task has completed execution
731// NEVER GENERATED BY COMPILER, DEPRECATED!!!
732
733void
734__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
735{
736 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
737 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
738
739 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
740
741 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
742 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
743 return;
744}
745#endif // TASK_UNUSED
746
747
Andrey Churbanove5f44922015-04-29 16:22:07 +0000748#if OMPT_SUPPORT
749//----------------------------------------------------------------------------------------------------
750// __kmp_task_init_ompt:
751// Initialize OMPT fields maintained by a task. Since the serial task is initialized before
752// ompt_initialize is called, at the point the serial task is initialized we don't know whether
753// OMPT will be used or not when the serial task is initialized. This function provides the support
754// needed to initialize OMPT for the serial task after the fact.
755
756void
757__kmp_task_init_ompt( kmp_taskdata_t * task, int tid )
758{
759 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
760 task->ompt_task_info.function = NULL;
761 task->ompt_task_info.frame = (ompt_frame_t) {
762 .exit_runtime_frame = NULL,
763 .reenter_runtime_frame = NULL
764 };
765}
766#endif
767
768
Jim Cownie5e8470a2013-09-27 10:38:44 +0000769//----------------------------------------------------------------------------------------------------
770// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
771//
772// loc_ref: reference to source location of parallel region
773// this_thr: thread data structure corresponding to implicit task
774// team: team for this_thr
775// tid: thread id of given thread within team
776// set_curr_task: TRUE if need to push current task to thread
777// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
778// TODO: Get better loc_ref. Value passed in may be NULL
779
780void
781__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
782{
783 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
784
785 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
786 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
787
788 task->td_task_id = KMP_GEN_TASK_ID();
789 task->td_team = team;
790// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
791 task->td_ident = loc_ref;
792 task->td_taskwait_ident = NULL;
793 task->td_taskwait_counter = 0;
794 task->td_taskwait_thread = 0;
795
796 task->td_flags.tiedness = TASK_TIED;
797 task->td_flags.tasktype = TASK_IMPLICIT;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000798#if OMP_41_ENABLED
799 task->td_flags.proxy = TASK_FULL;
800#endif
801
Jim Cownie5e8470a2013-09-27 10:38:44 +0000802 // All implicit tasks are executed immediately, not deferred
803 task->td_flags.task_serial = 1;
804 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
805 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
806
807 task->td_flags.started = 1;
808 task->td_flags.executing = 1;
809 task->td_flags.complete = 0;
810 task->td_flags.freed = 0;
811
Jim Cownie181b4bb2013-12-23 17:28:57 +0000812#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000813 task->td_dephash = NULL;
814 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000815#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000816
817 if (set_curr_task) { // only do this initialization the first time a thread is created
818 task->td_incomplete_child_tasks = 0;
819 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
820#if OMP_40_ENABLED
821 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
822#endif
823 __kmp_push_current_task_to_thread( this_thr, team, tid );
824 } else {
825 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
826 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
827 }
828
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000829#if OMPT_SUPPORT
830 __kmp_task_init_ompt(task, tid);
831#endif
832
Jim Cownie5e8470a2013-09-27 10:38:44 +0000833 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
834 tid, team, task ) );
835}
836
837// Round up a size to a power of two specified by val
838// Used to insert padding between structures co-allocated using a single malloc() call
839static size_t
840__kmp_round_up_to_val( size_t size, size_t val ) {
841 if ( size & ( val - 1 ) ) {
842 size &= ~ ( val - 1 );
843 if ( size <= KMP_SIZE_T_MAX - val ) {
844 size += val; // Round up if there is no overflow.
845 }; // if
846 }; // if
847 return size;
848} // __kmp_round_up_to_va
849
850
851//---------------------------------------------------------------------------------
852// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
853//
854// loc_ref: source location information
855// gtid: global thread number.
856// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
857// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
858// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
859// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
860// task_entry: Pointer to task code entry point generated by compiler.
861// returns: a pointer to the allocated kmp_task_t structure (task).
862
863kmp_task_t *
864__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
865 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
866 kmp_routine_entry_t task_entry )
867{
868 kmp_task_t *task;
869 kmp_taskdata_t *taskdata;
870 kmp_info_t *thread = __kmp_threads[ gtid ];
871 kmp_team_t *team = thread->th.th_team;
872 kmp_taskdata_t *parent_task = thread->th.th_current_task;
873 size_t shareds_offset;
874
875 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
876 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
877 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
878 sizeof_shareds, task_entry) );
879
880 if ( parent_task->td_flags.final ) {
881 if (flags->merged_if0) {
882 }
883 flags->final = 1;
884 }
885
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000886#if OMP_41_ENABLED
887 if ( flags->proxy == TASK_PROXY ) {
888 flags->tiedness = TASK_UNTIED;
889 flags->merged_if0 = 1;
890
891 /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
892 if ( (thread->th.th_task_team) == NULL ) {
893 /* This should only happen if the team is serialized
894 setup a task team and propagate it to the thread
895 */
896 KMP_DEBUG_ASSERT(team->t.t_serialized);
897 KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
898 __kmp_task_team_setup(thread,team,0,1); // 0,1 indicates only setup the current team regardless of nthreads
899 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
900 }
901 kmp_task_team_t * task_team = thread->th.th_task_team;
902
903 /* tasking must be enabled now as the task might not be pushed */
904 if ( !KMP_TASKING_ENABLED( task_team ) ) {
905 KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
906 __kmp_enable_tasking( task_team, thread );
907 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
908 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
909 // No lock needed since only owner can allocate
910 if (thread_data -> td.td_deque == NULL ) {
911 __kmp_alloc_task_deque( thread, thread_data );
912 }
913 }
914
915 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
916 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
917 }
918#endif
919
Jim Cownie5e8470a2013-09-27 10:38:44 +0000920 // Calculate shared structure offset including padding after kmp_task_t struct
921 // to align pointers in shared struct
922 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
923 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
924
925 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
926 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
927 gtid, shareds_offset) );
928 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
929 gtid, sizeof_shareds) );
930
931 // Avoid double allocation here by combining shareds with taskdata
932 #if USE_FAST_MEMORY
933 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
934 #else /* ! USE_FAST_MEMORY */
935 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
936 #endif /* USE_FAST_MEMORY */
937
938 task = KMP_TASKDATA_TO_TASK(taskdata);
939
940 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000941#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000942 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
943 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
944#else
945 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
946 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
947#endif
948 if (sizeof_shareds > 0) {
949 // Avoid double allocation here by combining shareds with taskdata
950 task->shareds = & ((char *) taskdata)[ shareds_offset ];
951 // Make sure shareds struct is aligned to pointer size
952 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
953 } else {
954 task->shareds = NULL;
955 }
956 task->routine = task_entry;
957 task->part_id = 0; // AC: Always start with 0 part id
958
959 taskdata->td_task_id = KMP_GEN_TASK_ID();
960 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000961 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000962 taskdata->td_parent = parent_task;
963 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
964 taskdata->td_ident = loc_ref;
965 taskdata->td_taskwait_ident = NULL;
966 taskdata->td_taskwait_counter = 0;
967 taskdata->td_taskwait_thread = 0;
968 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000969#if OMP_41_ENABLED
970 // avoid copying icvs for proxy tasks
971 if ( flags->proxy == TASK_FULL )
972#endif
973 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000974
975 taskdata->td_flags.tiedness = flags->tiedness;
976 taskdata->td_flags.final = flags->final;
977 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000978#if OMP_40_ENABLED
979 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
980#endif // OMP_40_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000981#if OMP_41_ENABLED
982 taskdata->td_flags.proxy = flags->proxy;
983#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000984 taskdata->td_flags.tasktype = TASK_EXPLICIT;
985
986 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
987 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
988
989 // GEH - TODO: fix this to copy parent task's value of team_serial flag
990 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
991
992 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
993 // tasks are not left until program termination to execute. Also, it helps locality to execute
994 // immediately.
Jonathan Peyton7881aa12015-05-21 21:16:38 +0000995 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
Jim Cownie5e8470a2013-09-27 10:38:44 +0000996 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
997
998 taskdata->td_flags.started = 0;
999 taskdata->td_flags.executing = 0;
1000 taskdata->td_flags.complete = 0;
1001 taskdata->td_flags.freed = 0;
1002
1003 taskdata->td_flags.native = flags->native;
1004
1005 taskdata->td_incomplete_child_tasks = 0;
1006 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1007#if OMP_40_ENABLED
1008 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1009 taskdata->td_dephash = NULL;
1010 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001011#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001012
1013 // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
1014#if OMP_41_ENABLED
1015 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1016#else
1017 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1018#endif
1019 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001020 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1021#if OMP_40_ENABLED
1022 if ( parent_task->td_taskgroup )
1023 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1024#endif
1025 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1026 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1027 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1028 }
1029 }
1030
1031 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1032 gtid, taskdata, taskdata->td_parent) );
1033
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001034#if OMPT_SUPPORT
1035 if (ompt_status & ompt_status_track) {
1036 taskdata->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1037 taskdata->ompt_task_info.function = (void*) task_entry;
1038 taskdata->ompt_task_info.frame = (ompt_frame_t)
1039 { .exit_runtime_frame = NULL, .reenter_runtime_frame = NULL };
1040 }
1041#endif
1042
Jim Cownie5e8470a2013-09-27 10:38:44 +00001043 return task;
1044}
1045
1046
1047kmp_task_t *
1048__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1049 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1050 kmp_routine_entry_t task_entry )
1051{
1052 kmp_task_t *retval;
1053 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1054
1055 input_flags->native = FALSE;
1056 // __kmp_task_alloc() sets up all other runtime flags
1057
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001058#if OMP_41_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001059 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001060 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1061 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001062 input_flags->proxy ? "proxy" : "",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001063 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001064#else
1065 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1066 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1067 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1068 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1069#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001070
1071 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1072 sizeof_shareds, task_entry );
1073
1074 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1075
1076 return retval;
1077}
1078
1079//-----------------------------------------------------------
1080// __kmp_invoke_task: invoke the specified task
1081//
1082// gtid: global thread ID of caller
1083// task: the task to invoke
1084// current_task: the task to resume after task invokation
1085
1086static void
1087__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1088{
1089 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001090#if OMP_40_ENABLED
1091 int discard = 0 /* false */;
1092#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001093 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1094 gtid, taskdata, current_task) );
1095
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001096#if OMP_41_ENABLED
1097 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1098 taskdata->td_flags.complete == 1)
1099 {
1100 // This is a proxy task that was already completed but it needs to run
1101 // its bottom-half finish
1102 KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1103 gtid, taskdata) );
1104
1105 __kmp_bottom_half_finish_proxy(gtid,task);
1106
1107 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1108
1109 return;
1110 }
1111#endif
1112
1113#if OMP_41_ENABLED
1114 // Proxy tasks are not handled by the runtime
1115 if ( taskdata->td_flags.proxy != TASK_PROXY )
1116#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001117 __kmp_task_start( gtid, task, current_task );
1118
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001119#if OMPT_SUPPORT
1120 ompt_thread_info_t oldInfo;
1121 kmp_info_t * thread;
1122 if (ompt_status & ompt_status_track) {
1123 // Store the threads states and restore them after the task
1124 thread = __kmp_threads[ gtid ];
1125 oldInfo = thread->th.ompt_thread_info;
1126 thread->th.ompt_thread_info.wait_id = 0;
1127 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1128 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1129 }
1130#endif
1131
Jim Cownie181b4bb2013-12-23 17:28:57 +00001132#if OMP_40_ENABLED
1133 // TODO: cancel tasks if the parallel region has also been cancelled
1134 // TODO: check if this sequence can be hoisted above __kmp_task_start
1135 // if cancellation has been enabled for this run ...
1136 if (__kmp_omp_cancellation) {
1137 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1138 kmp_team_t * this_team = this_thr->th.th_team;
1139 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1140 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1141 // this task belongs to a task group and we need to cancel it
1142 discard = 1 /* true */;
1143 }
1144 }
1145
Jim Cownie5e8470a2013-09-27 10:38:44 +00001146 //
1147 // Invoke the task routine and pass in relevant data.
1148 // Thunks generated by gcc take a different argument list.
1149 //
Jim Cownie181b4bb2013-12-23 17:28:57 +00001150 if (!discard) {
1151#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001152#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +00001153 if (taskdata->td_flags.native) {
1154 ((void (*)(void *))(*(task->routine)))(task->shareds);
1155 }
1156 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001157#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001158 {
1159 (*(task->routine))(gtid, task);
1160 }
1161#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001162 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001163#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001164
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001165
1166#if OMPT_SUPPORT
1167 if (ompt_status & ompt_status_track) {
1168 thread->th.ompt_thread_info = oldInfo;
1169 taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1170 }
1171#endif
1172
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001173#if OMP_41_ENABLED
1174 // Proxy tasks are not handled by the runtime
1175 if ( taskdata->td_flags.proxy != TASK_PROXY )
1176#endif
1177 __kmp_task_finish( gtid, task, current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001178
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001179 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001180 gtid, taskdata, current_task) );
1181 return;
1182}
1183
1184//-----------------------------------------------------------------------
1185// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1186//
1187// loc_ref: location of original task pragma (ignored)
1188// gtid: Global Thread ID of encountering thread
1189// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1190// Returns:
1191// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1192// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1193
1194kmp_int32
1195__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1196{
1197 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1198
1199 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1200 gtid, loc_ref, new_taskdata ) );
1201
1202 /* Should we execute the new task or queue it? For now, let's just always try to
1203 queue it. If the queue fills up, then we'll execute it. */
1204
1205 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1206 { // Execute this task immediately
1207 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1208 new_taskdata->td_flags.task_serial = 1;
1209 __kmp_invoke_task( gtid, new_task, current_task );
1210 }
1211
1212 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1213 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1214 new_taskdata ) );
1215
1216 return TASK_CURRENT_NOT_QUEUED;
1217}
1218
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001219//---------------------------------------------------------------------
1220// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1221// gtid: Global Thread ID of encountering thread
1222// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1223// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1224// returns:
1225//
1226// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1227// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1228kmp_int32
1229__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1230{
1231 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1232
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001233#if OMPT_SUPPORT
1234 if (ompt_status & ompt_status_track) {
1235 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1236 __builtin_frame_address(0);
1237 }
1238#endif
1239
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001240 /* Should we execute the new task or queue it? For now, let's just always try to
1241 queue it. If the queue fills up, then we'll execute it. */
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001242#if OMP_41_ENABLED
1243 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1244#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001245 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001246#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001247 { // Execute this task immediately
1248 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1249 if ( serialize_immediate )
1250 new_taskdata -> td_flags.task_serial = 1;
1251 __kmp_invoke_task( gtid, new_task, current_task );
1252 }
1253
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001254#if OMPT_SUPPORT
1255 if (ompt_status & ompt_status_track) {
1256 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1257 }
1258#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001259
1260 return TASK_CURRENT_NOT_QUEUED;
1261}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001262
1263//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001264// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1265// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001266// loc_ref: location of original task pragma (ignored)
1267// gtid: Global Thread ID of encountering thread
1268// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1269// returns:
1270//
1271// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1272// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1273
1274kmp_int32
1275__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1276{
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001277 kmp_taskdata_t * new_taskdata;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001278 kmp_int32 res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001279
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001280 new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001281 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1282 gtid, loc_ref, new_taskdata ) );
1283
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001284 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001285
1286 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1287 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001288 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001289}
1290
Jim Cownie5e8470a2013-09-27 10:38:44 +00001291//-------------------------------------------------------------------------------------
1292// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1293
1294kmp_int32
1295__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1296{
1297 kmp_taskdata_t * taskdata;
1298 kmp_info_t * thread;
1299 int thread_finished = FALSE;
1300
1301 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1302 gtid, loc_ref) );
1303
1304 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1305 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1306
1307 thread = __kmp_threads[ gtid ];
1308 taskdata = thread -> th.th_current_task;
1309#if USE_ITT_BUILD
1310 // Note: These values are used by ITT events as well.
1311#endif /* USE_ITT_BUILD */
1312 taskdata->td_taskwait_counter += 1;
1313 taskdata->td_taskwait_ident = loc_ref;
1314 taskdata->td_taskwait_thread = gtid + 1;
1315
1316#if USE_ITT_BUILD
1317 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1318 if ( itt_sync_obj != NULL )
1319 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1320#endif /* USE_ITT_BUILD */
1321
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001322#if OMP_41_ENABLED
1323 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1324#else
1325 if ( ! taskdata->td_flags.team_serial )
1326#endif
1327 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001328 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001329 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001330 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001331 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1332 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001333 }
1334 }
1335#if USE_ITT_BUILD
1336 if ( itt_sync_obj != NULL )
1337 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1338#endif /* USE_ITT_BUILD */
1339
1340 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1341 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1342 }
1343
1344 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1345 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1346
1347 return TASK_CURRENT_NOT_QUEUED;
1348}
1349
1350
1351//-------------------------------------------------
1352// __kmpc_omp_taskyield: switch to a different task
1353
1354kmp_int32
1355__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1356{
1357 kmp_taskdata_t * taskdata;
1358 kmp_info_t * thread;
1359 int thread_finished = FALSE;
1360
1361 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1362 gtid, loc_ref, end_part) );
1363
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001364 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001365 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1366
1367 thread = __kmp_threads[ gtid ];
1368 taskdata = thread -> th.th_current_task;
1369 // Should we model this as a task wait or not?
1370#if USE_ITT_BUILD
1371 // Note: These values are used by ITT events as well.
1372#endif /* USE_ITT_BUILD */
1373 taskdata->td_taskwait_counter += 1;
1374 taskdata->td_taskwait_ident = loc_ref;
1375 taskdata->td_taskwait_thread = gtid + 1;
1376
1377#if USE_ITT_BUILD
1378 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1379 if ( itt_sync_obj != NULL )
1380 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1381#endif /* USE_ITT_BUILD */
1382 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001383 kmp_task_team_t * task_team = thread->th.th_task_team;
1384 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001385 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001386 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1387 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1388 }
1389 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001390 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001391#if USE_ITT_BUILD
1392 if ( itt_sync_obj != NULL )
1393 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1394#endif /* USE_ITT_BUILD */
1395
1396 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1397 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1398 }
1399
1400 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1401 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1402
1403 return TASK_CURRENT_NOT_QUEUED;
1404}
1405
1406
1407#if OMP_40_ENABLED
1408//-------------------------------------------------------------------------------------
1409// __kmpc_taskgroup: Start a new taskgroup
1410
1411void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001412__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001413{
1414 kmp_info_t * thread = __kmp_threads[ gtid ];
1415 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1416 kmp_taskgroup_t * tg_new =
1417 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1418 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1419 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001420 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001421 tg_new->parent = taskdata->td_taskgroup;
1422 taskdata->td_taskgroup = tg_new;
1423}
1424
1425
1426//-------------------------------------------------------------------------------------
1427// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1428// and its descendants are complete
1429
1430void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001431__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001432{
1433 kmp_info_t * thread = __kmp_threads[ gtid ];
1434 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1435 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1436 int thread_finished = FALSE;
1437
1438 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1439 KMP_DEBUG_ASSERT( taskgroup != NULL );
1440
1441 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1442#if USE_ITT_BUILD
1443 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1444 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1445 if ( itt_sync_obj != NULL )
1446 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1447#endif /* USE_ITT_BUILD */
1448
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001449#if OMP_41_ENABLED
1450 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1451#else
1452 if ( ! taskdata->td_flags.team_serial )
1453#endif
1454 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001455 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001456 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001457 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1458 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001459 }
1460 }
1461
1462#if USE_ITT_BUILD
1463 if ( itt_sync_obj != NULL )
1464 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1465#endif /* USE_ITT_BUILD */
1466 }
1467 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1468
1469 // Restore parent taskgroup for the current task
1470 taskdata->td_taskgroup = taskgroup->parent;
1471 __kmp_thread_free( thread, taskgroup );
1472
1473 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1474}
1475#endif
1476
1477
1478//------------------------------------------------------
1479// __kmp_remove_my_task: remove a task from my own deque
1480
1481static kmp_task_t *
1482__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1483 kmp_int32 is_constrained )
1484{
1485 kmp_task_t * task;
1486 kmp_taskdata_t * taskdata;
1487 kmp_thread_data_t *thread_data;
1488 kmp_uint32 tail;
1489
1490 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1491 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1492
1493 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1494
1495 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1496 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1497 thread_data->td.td_deque_tail) );
1498
1499 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1500 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1501 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1502 thread_data->td.td_deque_tail) );
1503 return NULL;
1504 }
1505
1506 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1507
1508 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1509 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1510 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1511 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1512 thread_data->td.td_deque_tail) );
1513 return NULL;
1514 }
1515
1516 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1517 taskdata = thread_data -> td.td_deque[ tail ];
1518
1519 if (is_constrained) {
1520 // we need to check if the candidate obeys task scheduling constraint:
1521 // only child of current task can be scheduled
1522 kmp_taskdata_t * current = thread->th.th_current_task;
1523 kmp_int32 level = current->td_level;
1524 kmp_taskdata_t * parent = taskdata->td_parent;
1525 while ( parent != current && parent->td_level > level ) {
1526 parent = parent->td_parent; // check generation up to the level of the current task
1527 KMP_DEBUG_ASSERT(parent != NULL);
1528 }
1529 if ( parent != current ) {
1530 // If the tail task is not a child, then no other childs can appear in the deque.
1531 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1532 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1533 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1534 thread_data->td.td_deque_tail) );
1535 return NULL;
1536 }
1537 }
1538
1539 thread_data -> td.td_deque_tail = tail;
1540 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1541
1542 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1543
1544 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1545 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1546 thread_data->td.td_deque_tail) );
1547
1548 task = KMP_TASKDATA_TO_TASK( taskdata );
1549 return task;
1550}
1551
1552
1553//-----------------------------------------------------------
1554// __kmp_steal_task: remove a task from another thread's deque
1555// Assume that calling thread has already checked existence of
1556// task_team thread_data before calling this routine.
1557
1558static kmp_task_t *
1559__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1560 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1561 kmp_int32 is_constrained )
1562{
1563 kmp_task_t * task;
1564 kmp_taskdata_t * taskdata;
1565 kmp_thread_data_t *victim_td, *threads_data;
Jonathan Peyton7c4d66d2015-06-08 20:01:14 +00001566 kmp_int32 victim_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001567
1568 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1569
1570 threads_data = task_team -> tt.tt_threads_data;
1571 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1572
1573 victim_tid = victim->th.th_info.ds.ds_tid;
1574 victim_td = & threads_data[ victim_tid ];
1575
1576 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1577 "head=%u tail=%u\n",
1578 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1579 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1580
1581 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1582 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1583 {
1584 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1585 "ntasks=%d head=%u tail=%u\n",
1586 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1587 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1588 return NULL;
1589 }
1590
1591 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1592
1593 // Check again after we acquire the lock
1594 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1595 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1596 {
1597 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1598 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1599 "ntasks=%d head=%u tail=%u\n",
1600 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1601 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1602 return NULL;
1603 }
1604
1605 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1606
1607 if ( !is_constrained ) {
1608 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1609 // Bump head pointer and Wrap.
1610 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1611 } else {
1612 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1613 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1614 taskdata = victim_td -> td.td_deque[ tail ];
1615 // we need to check if the candidate obeys task scheduling constraint:
1616 // only child of current task can be scheduled
1617 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1618 kmp_int32 level = current->td_level;
1619 kmp_taskdata_t * parent = taskdata->td_parent;
1620 while ( parent != current && parent->td_level > level ) {
1621 parent = parent->td_parent; // check generation up to the level of the current task
1622 KMP_DEBUG_ASSERT(parent != NULL);
1623 }
1624 if ( parent != current ) {
1625 // If the tail task is not a child, then no other childs can appear in the deque (?).
1626 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1627 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1628 "ntasks=%d head=%u tail=%u\n",
1629 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1630 task_team, victim_td->td.td_deque_ntasks,
1631 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1632 return NULL;
1633 }
1634 victim_td -> td.td_deque_tail = tail;
1635 }
1636 if (*thread_finished) {
1637 // We need to un-mark this victim as a finished victim. This must be done before
1638 // releasing the lock, or else other threads (starting with the master victim)
1639 // might be prematurely released from the barrier!!!
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001640 kmp_uint32 count;
1641
1642 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001643
1644 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1645 gtid, count + 1, task_team) );
1646
1647 *thread_finished = FALSE;
1648 }
1649 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1650
1651 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1652
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001653 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001654 "ntasks=%d head=%u tail=%u\n",
1655 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1656 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1657 victim_td->td.td_deque_tail) );
1658
1659 task = KMP_TASKDATA_TO_TASK( taskdata );
1660 return task;
1661}
1662
1663
1664//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001665// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001666// is statisfied (return true) or there are none left (return false).
1667// final_spin is TRUE if this is the spin at the release barrier.
1668// thread_finished indicates whether the thread is finished executing all
1669// the tasks it has on its deque, and is at the release barrier.
1670// spinner is the location on which to spin.
1671// spinner == NULL means only execute a single task and return.
1672// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001673template <class C>
1674static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1675 int *thread_finished
1676 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001677{
1678 kmp_task_team_t * task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001679 kmp_thread_data_t * threads_data;
1680 kmp_task_t * task;
1681 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1682 volatile kmp_uint32 * unfinished_threads;
1683 kmp_int32 nthreads, last_stolen, k, tid;
1684
1685 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1686 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1687
1688 task_team = thread -> th.th_task_team;
1689 KMP_DEBUG_ASSERT( task_team != NULL );
1690
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001691 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001692 gtid, final_spin, *thread_finished) );
1693
1694 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1695 KMP_DEBUG_ASSERT( threads_data != NULL );
1696
1697 nthreads = task_team -> tt.tt_nproc;
1698 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001699#if OMP_41_ENABLED
1700 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1701#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001702 KMP_DEBUG_ASSERT( nthreads > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001703#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001704 KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1705
1706 // Choose tasks from our own work queue.
1707 start:
1708 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1709#if USE_ITT_BUILD && USE_ITT_NOTIFY
1710 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1711 if ( itt_sync_obj == NULL ) {
1712 // we are at fork barrier where we could not get the object reliably
1713 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1714 }
1715 __kmp_itt_task_starting( itt_sync_obj );
1716 }
1717#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1718 __kmp_invoke_task( gtid, task, current_task );
1719#if USE_ITT_BUILD
1720 if ( itt_sync_obj != NULL )
1721 __kmp_itt_task_finished( itt_sync_obj );
1722#endif /* USE_ITT_BUILD */
1723
1724 // If this thread is only partway through the barrier and the condition
1725 // is met, then return now, so that the barrier gather/release pattern can proceed.
1726 // If this thread is in the last spin loop in the barrier, waiting to be
1727 // released, we know that the termination condition will not be satisified,
1728 // so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001729 if (flag == NULL || (!final_spin && flag->done_check())) {
1730 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001731 return TRUE;
1732 }
1733 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1734 }
1735
1736 // This thread's work queue is empty. If we are in the final spin loop
1737 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001738#if OMP_41_ENABLED
1739 // The work queue may be empty but there might be proxy tasks still executing
1740 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1741#else
1742 if (final_spin)
1743#endif
1744 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001745 // First, decrement the #unfinished threads, if that has not already
1746 // been done. This decrement might be to the spin location, and
1747 // result in the termination condition being satisfied.
1748 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001749 kmp_uint32 count;
1750
1751 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001752 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001753 gtid, count, task_team) );
1754 *thread_finished = TRUE;
1755 }
1756
1757 // It is now unsafe to reference thread->th.th_team !!!
1758 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1759 // thread to pass through the barrier, where it might reset each thread's
1760 // th.th_team field for the next parallel region.
1761 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001762 if (flag != NULL && flag->done_check()) {
1763 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001764 return TRUE;
1765 }
1766 }
1767
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001768#if OMP_41_ENABLED
1769 // check if there are other threads to steal from, otherwise go back
1770 if ( nthreads == 1 )
1771 goto start;
1772#endif
1773
Jim Cownie5e8470a2013-09-27 10:38:44 +00001774 // Try to steal from the last place I stole from successfully.
1775 tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1776 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1777
1778 if (last_stolen != -1) {
1779 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1780
1781 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1782 thread_finished, is_constrained )) != NULL)
1783 {
1784#if USE_ITT_BUILD && USE_ITT_NOTIFY
1785 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1786 if ( itt_sync_obj == NULL ) {
1787 // we are at fork barrier where we could not get the object reliably
1788 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1789 }
1790 __kmp_itt_task_starting( itt_sync_obj );
1791 }
1792#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1793 __kmp_invoke_task( gtid, task, current_task );
1794#if USE_ITT_BUILD
1795 if ( itt_sync_obj != NULL )
1796 __kmp_itt_task_finished( itt_sync_obj );
1797#endif /* USE_ITT_BUILD */
1798
1799 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001800 if (flag == NULL || (!final_spin && flag->done_check())) {
1801 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001802 gtid) );
1803 return TRUE;
1804 }
1805
1806 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1807 // If the execution of the stolen task resulted in more tasks being
1808 // placed on our run queue, then restart the whole process.
1809 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001810 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001811 gtid) );
1812 goto start;
1813 }
1814 }
1815
1816 // Don't give priority to stealing from this thread anymore.
1817 threads_data[ tid ].td.td_deque_last_stolen = -1;
1818
1819 // The victims's work queue is empty. If we are in the final spin loop
1820 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001821#if OMP_41_ENABLED
1822 // The work queue may be empty but there might be proxy tasks still executing
1823 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1824#else
1825 if (final_spin)
1826#endif
1827 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001828 // First, decrement the #unfinished threads, if that has not already
1829 // been done. This decrement might be to the spin location, and
1830 // result in the termination condition being satisfied.
1831 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001832 kmp_uint32 count;
1833
1834 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001835 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001836 "task_team=%p\n", gtid, count, task_team) );
1837 *thread_finished = TRUE;
1838 }
1839
1840 // If __kmp_tasking_mode != tskm_immediate_exec
1841 // then it is now unsafe to reference thread->th.th_team !!!
1842 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1843 // thread to pass through the barrier, where it might reset each thread's
1844 // th.th_team field for the next parallel region.
1845 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001846 if (flag != NULL && flag->done_check()) {
1847 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001848 gtid) );
1849 return TRUE;
1850 }
1851 }
1852 }
1853
1854 // Find a different thread to steal work from. Pick a random thread.
1855 // My initial plan was to cycle through all the threads, and only return
1856 // if we tried to steal from every thread, and failed. Arch says that's
1857 // not such a great idea.
1858 // GEH - need yield code in this loop for throughput library mode?
1859 new_victim:
1860 k = __kmp_get_random( thread ) % (nthreads - 1);
1861 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1862 ++k; // Adjusts random distribution to exclude self
1863 }
1864 {
1865 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1866 int first;
1867
1868 // There is a slight chance that __kmp_enable_tasking() did not wake up
1869 // all threads waiting at the barrier. If this thread is sleeping, then
1870 // then wake it up. Since we weree going to pay the cache miss penalty
1871 // for referenceing another thread's kmp_info_t struct anyway, the check
1872 // shouldn't cost too much performance at this point.
1873 // In extra barrier mode, tasks do not sleep at the separate tasking
1874 // barrier, so this isn't a problem.
1875 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1876 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1877 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1878 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001879 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001880 // A sleeping thread should not have any tasks on it's queue.
Alp Toker8f2d3f02014-02-24 10:40:15 +00001881 // There is a slight possibility that it resumes, steals a task from
Jim Cownie5e8470a2013-09-27 10:38:44 +00001882 // another thread, which spawns more tasks, all in the that it takes
1883 // this thread to check => don't write an assertion that the victim's
1884 // queue is empty. Try stealing from a different thread.
1885 goto new_victim;
1886 }
1887
1888 // Now try to steal work from the selected thread
1889 first = TRUE;
1890 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1891 thread_finished, is_constrained )) != NULL)
1892 {
1893#if USE_ITT_BUILD && USE_ITT_NOTIFY
1894 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1895 if ( itt_sync_obj == NULL ) {
1896 // we are at fork barrier where we could not get the object reliably
1897 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1898 }
1899 __kmp_itt_task_starting( itt_sync_obj );
1900 }
1901#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1902 __kmp_invoke_task( gtid, task, current_task );
1903#if USE_ITT_BUILD
1904 if ( itt_sync_obj != NULL )
1905 __kmp_itt_task_finished( itt_sync_obj );
1906#endif /* USE_ITT_BUILD */
1907
1908 // Try stealing from this victim again, in the future.
1909 if (first) {
1910 threads_data[ tid ].td.td_deque_last_stolen = k;
1911 first = FALSE;
1912 }
1913
1914 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001915 if (flag == NULL || (!final_spin && flag->done_check())) {
1916 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001917 gtid) );
1918 return TRUE;
1919 }
1920 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1921
1922 // If the execution of the stolen task resulted in more tasks being
1923 // placed on our run queue, then restart the whole process.
1924 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001925 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001926 gtid) );
1927 goto start;
1928 }
1929 }
1930
1931 // The victims's work queue is empty. If we are in the final spin loop
1932 // of the barrier, check and see if the termination condition is satisfied.
1933 // Going on and finding a new victim to steal from is expensive, as it
1934 // involves a lot of cache misses, so we definitely want to re-check the
1935 // termination condition before doing that.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001936#if OMP_41_ENABLED
1937 // The work queue may be empty but there might be proxy tasks still executing
1938 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1939#else
1940 if (final_spin)
1941#endif
1942 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001943 // First, decrement the #unfinished threads, if that has not already
1944 // been done. This decrement might be to the spin location, and
1945 // result in the termination condition being satisfied.
1946 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001947 kmp_uint32 count;
1948
1949 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001950 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001951 "task_team=%p\n",
1952 gtid, count, task_team) );
1953 *thread_finished = TRUE;
1954 }
1955
1956 // If __kmp_tasking_mode != tskm_immediate_exec,
1957 // then it is now unsafe to reference thread->th.th_team !!!
1958 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1959 // thread to pass through the barrier, where it might reset each thread's
1960 // th.th_team field for the next parallel region.
1961 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001962 if (flag != NULL && flag->done_check()) {
1963 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964 return TRUE;
1965 }
1966 }
1967 }
1968
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001969 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001970 return FALSE;
1971}
1972
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001973int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1974 int *thread_finished
1975 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1976{
1977 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1978 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1979}
1980
1981int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
1982 int *thread_finished
1983 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1984{
1985 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1986 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1987}
1988
1989int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
1990 int *thread_finished
1991 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1992{
1993 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1994 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1995}
1996
1997
Jim Cownie5e8470a2013-09-27 10:38:44 +00001998
1999//-----------------------------------------------------------------------------
2000// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2001// next barrier so they can assist in executing enqueued tasks.
2002// First thread in allocates the task team atomically.
2003
2004static void
2005__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2006{
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002007 kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002008 kmp_thread_data_t *threads_data;
2009 int nthreads, i, is_init_thread;
2010
2011 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2012 __kmp_gtid_from_thread( this_thr ) ) );
2013
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002014 team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015 KMP_DEBUG_ASSERT(task_team != NULL);
2016 KMP_DEBUG_ASSERT(team != NULL);
2017
2018 nthreads = task_team->tt.tt_nproc;
2019 KMP_DEBUG_ASSERT(nthreads > 0);
2020 KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
2021
2022 // Allocate or increase the size of threads_data if necessary
2023 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2024
2025 if (!is_init_thread) {
2026 // Some other thread already set up the array.
2027 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2028 __kmp_gtid_from_thread( this_thr ) ) );
2029 return;
2030 }
2031 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2032 KMP_DEBUG_ASSERT( threads_data != NULL );
2033
2034 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2035 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2036 {
2037 // Release any threads sleeping at the barrier, so that they can steal
2038 // tasks and execute them. In extra barrier mode, tasks do not sleep
2039 // at the separate tasking barrier, so this isn't a problem.
2040 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002041 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002042 kmp_info_t *thread = threads_data[i].td.td_thr;
2043
2044 if (i == this_thr->th.th_info.ds.ds_tid) {
2045 continue;
2046 }
2047 // Since we haven't locked the thread's suspend mutex lock at this
2048 // point, there is a small window where a thread might be putting
2049 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002050 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002051 // see if other threads are sleeping (using the same random
2052 // mechanism that is used for task stealing) and awakens them if
2053 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002054 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002055 {
2056 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2057 __kmp_gtid_from_thread( this_thr ),
2058 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002059 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002060 }
2061 else {
2062 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2063 __kmp_gtid_from_thread( this_thr ),
2064 __kmp_gtid_from_thread( thread ) ) );
2065 }
2066 }
2067 }
2068
2069 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2070 __kmp_gtid_from_thread( this_thr ) ) );
2071}
2072
2073
2074/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002075/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00002076 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2077 * like a shadow of the kmp_team_t data struct, with a different lifetime.
2078 * After a child * thread checks into a barrier and calls __kmp_release() from
2079 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2080 * longer assume that the kmp_team_t structure is intact (at any moment, the
2081 * master thread may exit the barrier code and free the team data structure,
2082 * and return the threads to the thread pool).
2083 *
2084 * This does not work with the the tasking code, as the thread is still
2085 * expected to participate in the execution of any tasks that may have been
2086 * spawned my a member of the team, and the thread still needs access to all
2087 * to each thread in the team, so that it can steal work from it.
2088 *
2089 * Enter the existence of the kmp_task_team_t struct. It employs a reference
2090 * counting mechanims, and is allocated by the master thread before calling
2091 * __kmp_<barrier_kind>_release, and then is release by the last thread to
2092 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2093 * of the kmp_task_team_t structs for consecutive barriers can overlap
2094 * (and will, unless the master thread is the last thread to exit the barrier
2095 * release phase, which is not typical).
2096 *
2097 * The existence of such a struct is useful outside the context of tasking,
2098 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2099 * so that any performance differences show up when comparing the 2.5 vs. 3.0
2100 * libraries.
2101 *
2102 * We currently use the existence of the threads array as an indicator that
2103 * tasks were spawned since the last barrier. If the structure is to be
2104 * useful outside the context of tasking, then this will have to change, but
2105 * not settting the field minimizes the performance impact of tasking on
2106 * barriers, when no explicit tasks were spawned (pushed, actually).
2107 */
2108
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002109
Jim Cownie5e8470a2013-09-27 10:38:44 +00002110static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2111// Lock for task team data structures
2112static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2113
2114
2115//------------------------------------------------------------------------------
2116// __kmp_alloc_task_deque:
2117// Allocates a task deque for a particular thread, and initialize the necessary
2118// data structures relating to the deque. This only happens once per thread
2119// per task team since task teams are recycled.
2120// No lock is needed during allocation since each thread allocates its own
2121// deque.
2122
2123static void
2124__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2125{
2126 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2127 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2128
2129 // Initialize last stolen task field to "none"
2130 thread_data -> td.td_deque_last_stolen = -1;
2131
2132 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2133 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2134 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2135
2136 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2137 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2138 // Allocate space for task deque, and zero the deque
2139 // Cannot use __kmp_thread_calloc() because threads not around for
2140 // kmp_reap_task_team( ).
2141 thread_data -> td.td_deque = (kmp_taskdata_t **)
2142 __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2143}
2144
2145
2146//------------------------------------------------------------------------------
2147// __kmp_free_task_deque:
2148// Deallocates a task deque for a particular thread.
2149// Happens at library deallocation so don't need to reset all thread data fields.
2150
2151static void
2152__kmp_free_task_deque( kmp_thread_data_t *thread_data )
2153{
2154 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2155
2156 if ( thread_data -> td.td_deque != NULL ) {
2157 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2158 __kmp_free( thread_data -> td.td_deque );
2159 thread_data -> td.td_deque = NULL;
2160 }
2161 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2162
2163#ifdef BUILD_TIED_TASK_STACK
2164 // GEH: Figure out what to do here for td_susp_tied_tasks
2165 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2166 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2167 }
2168#endif // BUILD_TIED_TASK_STACK
2169}
2170
2171
2172//------------------------------------------------------------------------------
2173// __kmp_realloc_task_threads_data:
2174// Allocates a threads_data array for a task team, either by allocating an initial
2175// array or enlarging an existing array. Only the first thread to get the lock
2176// allocs or enlarges the array and re-initializes the array eleemnts.
2177// That thread returns "TRUE", the rest return "FALSE".
2178// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2179// The current size is given by task_team -> tt.tt_max_threads.
2180
2181static int
2182__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2183{
2184 kmp_thread_data_t ** threads_data_p;
2185 kmp_int32 nthreads, maxthreads;
2186 int is_init_thread = FALSE;
2187
2188 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2189 // Already reallocated and initialized.
2190 return FALSE;
2191 }
2192
2193 threads_data_p = & task_team -> tt.tt_threads_data;
2194 nthreads = task_team -> tt.tt_nproc;
2195 maxthreads = task_team -> tt.tt_max_threads;
2196
2197 // All threads must lock when they encounter the first task of the implicit task
2198 // region to make sure threads_data fields are (re)initialized before used.
2199 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2200
2201 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2202 // first thread to enable tasking
2203 kmp_team_t *team = thread -> th.th_team;
2204 int i;
2205
2206 is_init_thread = TRUE;
2207 if ( maxthreads < nthreads ) {
2208
2209 if ( *threads_data_p != NULL ) {
2210 kmp_thread_data_t *old_data = *threads_data_p;
2211 kmp_thread_data_t *new_data = NULL;
2212
2213 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2214 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2215 __kmp_gtid_from_thread( thread ), task_team,
2216 nthreads, maxthreads ) );
2217 // Reallocate threads_data to have more elements than current array
2218 // Cannot use __kmp_thread_realloc() because threads not around for
2219 // kmp_reap_task_team( ). Note all new array entries are initialized
2220 // to zero by __kmp_allocate().
2221 new_data = (kmp_thread_data_t *)
2222 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2223 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002224 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002225 (void *) old_data,
2226 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002227
2228#ifdef BUILD_TIED_TASK_STACK
2229 // GEH: Figure out if this is the right thing to do
2230 for (i = maxthreads; i < nthreads; i++) {
2231 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2232 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2233 }
2234#endif // BUILD_TIED_TASK_STACK
2235 // Install the new data and free the old data
2236 (*threads_data_p) = new_data;
2237 __kmp_free( old_data );
2238 }
2239 else {
2240 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2241 "threads data for task_team %p, size = %d\n",
2242 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2243 // Make the initial allocate for threads_data array, and zero entries
2244 // Cannot use __kmp_thread_calloc() because threads not around for
2245 // kmp_reap_task_team( ).
2246 *threads_data_p = (kmp_thread_data_t *)
2247 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2248#ifdef BUILD_TIED_TASK_STACK
2249 // GEH: Figure out if this is the right thing to do
2250 for (i = 0; i < nthreads; i++) {
2251 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2252 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2253 }
2254#endif // BUILD_TIED_TASK_STACK
2255 }
2256 task_team -> tt.tt_max_threads = nthreads;
2257 }
2258 else {
2259 // If array has (more than) enough elements, go ahead and use it
2260 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2261 }
2262
2263 // initialize threads_data pointers back to thread_info structures
2264 for (i = 0; i < nthreads; i++) {
2265 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2266 thread_data -> td.td_thr = team -> t.t_threads[i];
2267
2268 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2269 // The last stolen field survives across teams / barrier, and the number
2270 // of threads may have changed. It's possible (likely?) that a new
2271 // parallel region will exhibit the same behavior as the previous region.
2272 thread_data -> td.td_deque_last_stolen = -1;
2273 }
2274 }
2275
2276 KMP_MB();
2277 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2278 }
2279
2280 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2281 return is_init_thread;
2282}
2283
2284
2285//------------------------------------------------------------------------------
2286// __kmp_free_task_threads_data:
2287// Deallocates a threads_data array for a task team, including any attached
2288// tasking deques. Only occurs at library shutdown.
2289
2290static void
2291__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2292{
2293 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2294 if ( task_team -> tt.tt_threads_data != NULL ) {
2295 int i;
2296 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2297 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2298 }
2299 __kmp_free( task_team -> tt.tt_threads_data );
2300 task_team -> tt.tt_threads_data = NULL;
2301 }
2302 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2303}
2304
2305
2306//------------------------------------------------------------------------------
2307// __kmp_allocate_task_team:
2308// Allocates a task team associated with a specific team, taking it from
2309// the global task team free list if possible. Also initializes data structures.
2310
2311static kmp_task_team_t *
2312__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2313{
2314 kmp_task_team_t *task_team = NULL;
2315 int nthreads;
2316
2317 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2318 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2319
2320 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2321 // Take a task team from the task team pool
2322 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2323 if (__kmp_free_task_teams != NULL) {
2324 task_team = __kmp_free_task_teams;
2325 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2326 task_team -> tt.tt_next = NULL;
2327 }
2328 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2329 }
2330
2331 if (task_team == NULL) {
2332 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2333 "task team for team %p\n",
2334 __kmp_gtid_from_thread( thread ), team ) );
2335 // Allocate a new task team if one is not available.
2336 // Cannot use __kmp_thread_malloc() because threads not around for
2337 // kmp_reap_task_team( ).
2338 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2339 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2340 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2341 //task_team -> tt.tt_max_threads = 0;
2342 //task_team -> tt.tt_next = NULL;
2343 }
2344
2345 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002346#if OMP_41_ENABLED
2347 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2348#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002349 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2350
Jim Cownie5e8470a2013-09-27 10:38:44 +00002351 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2352 TCW_4( task_team -> tt.tt_active, TRUE );
2353 TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2354
2355 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2356 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2357 return task_team;
2358}
2359
2360
2361//------------------------------------------------------------------------------
2362// __kmp_free_task_team:
2363// Frees the task team associated with a specific thread, and adds it
2364// to the global task team free list.
2365//
2366
2367static void
2368__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2369{
2370 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2371 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2372
2373 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2374
2375 // Put task team back on free list
2376 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2377
2378 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2379 task_team -> tt.tt_next = __kmp_free_task_teams;
2380 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2381 TCW_PTR(__kmp_free_task_teams, task_team);
2382
2383 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2384}
2385
2386
2387//------------------------------------------------------------------------------
2388// __kmp_reap_task_teams:
2389// Free all the task teams on the task team free list.
2390// Should only be done during library shutdown.
2391// Cannot do anything that needs a thread structure or gtid since they are already gone.
2392
2393void
2394__kmp_reap_task_teams( void )
2395{
2396 kmp_task_team_t *task_team;
2397
2398 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2399 // Free all task_teams on the free list
2400 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2401 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2402 __kmp_free_task_teams = task_team -> tt.tt_next;
2403 task_team -> tt.tt_next = NULL;
2404
2405 // Free threads_data if necessary
2406 if ( task_team -> tt.tt_threads_data != NULL ) {
2407 __kmp_free_task_threads_data( task_team );
2408 }
2409 __kmp_free( task_team );
2410 }
2411 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2412 }
2413}
2414
2415
2416//------------------------------------------------------------------------------
2417// __kmp_unref_task_teams:
2418// Remove one thread from referencing the task team structure by
2419// decreasing the reference count and deallocate task team if no more
2420// references to it.
2421//
2422void
2423__kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2424{
2425 kmp_uint ref_ct;
2426
2427 ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2428
2429 KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2430 __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2431
2432
2433 if ( ref_ct == 0 ) {
2434 __kmp_free_task_team( thread, task_team );
2435 }
2436
2437 TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2438}
2439
2440
2441//------------------------------------------------------------------------------
2442// __kmp_wait_to_unref_task_teams:
2443// Some threads could still be in the fork barrier release code, possibly
2444// trying to steal tasks. Wait for each thread to unreference its task team.
2445//
2446void
2447__kmp_wait_to_unref_task_teams(void)
2448{
2449 kmp_info_t *thread;
2450 kmp_uint32 spins;
2451 int done;
2452
2453 KMP_INIT_YIELD( spins );
2454
2455
2456 for (;;) {
2457 done = TRUE;
2458
2459 // TODO: GEH - this may be is wrong because some sync would be necessary
2460 // in case threads are added to the pool during the traversal.
2461 // Need to verify that lock for thread pool is held when calling
2462 // this routine.
2463 for (thread = (kmp_info_t *)__kmp_thread_pool;
2464 thread != NULL;
2465 thread = thread->th.th_next_pool)
2466 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002467#if KMP_OS_WINDOWS
2468 DWORD exit_val;
2469#endif
2470 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2471 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2472 __kmp_gtid_from_thread( thread ) ) );
2473 continue;
2474 }
2475#if KMP_OS_WINDOWS
2476 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2477 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2478 if (TCR_PTR(thread->th.th_task_team) != NULL) {
2479 __kmp_unref_task_team( thread->th.th_task_team, thread );
2480 }
2481 continue;
2482 }
2483#endif
2484
2485 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2486
2487 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2488 __kmp_gtid_from_thread( thread ) ) );
2489
2490 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002491 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002492 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002493 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002494 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2495 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002496 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002497 }
2498 }
2499 }
2500 if (done) {
2501 break;
2502 }
2503
2504 // If we are oversubscribed,
2505 // or have waited a bit (and library mode is throughput), yield.
2506 // Pause is in the following code.
2507 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2508 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2509 }
2510
2511
2512}
2513
2514
2515//------------------------------------------------------------------------------
2516// __kmp_task_team_setup: Create a task_team for the current team, but use
2517// an already created, unused one if it already exists.
2518// This may be called by any thread, but only for teams with # threads >1.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002519void
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002520__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both, int always )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002521{
2522 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2523
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002524 if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( always || team->t.t_nproc > 1 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002525 // Allocate a new task team, which will be propagated to
2526 // all of the worker threads after the barrier. As they
2527 // spin in the barrier release phase, then will continue
2528 // to use the previous task team struct, until they receive
2529 // the signal to stop checking for tasks (they can't safely
2530 // reference the kmp_team_t struct, which could be reallocated
2531 // by the master thread).
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002532 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2533 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2534 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jim Cownie5e8470a2013-09-27 10:38:44 +00002535 ((team != NULL) ? team->t.t_id : -1)) );
2536 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002537 //else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002538 // All threads have reported in, and no tasks were spawned
2539 // for this release->gather region. Leave the old task
2540 // team struct in place for the upcoming region. No task
2541 // teams are formed for serialized teams.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002542 if (both) {
2543 int other_team = 1 - this_thr->th.th_task_state;
2544 if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well
2545 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2546 KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2547 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2548 ((team != NULL) ? team->t.t_id : -1)) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002549 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002550 }
2551}
2552
2553
2554//------------------------------------------------------------------------------
2555// __kmp_task_team_sync: Propagation of task team data from team to threads
2556// which happens just after the release phase of a team barrier. This may be
2557// called by any thread, but only for teams with # threads > 1.
2558
2559void
2560__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2561{
2562 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2563
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002564 // In case this thread never saw that the task team was no longer active, unref/deallocate it now.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002565 if ( this_thr->th.th_task_team != NULL ) {
2566 if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2567 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2568 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002569 } else { // We are re-using a task team that was never enabled.
2570 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002571 }
2572 }
2573
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002574 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jim Cownie5e8470a2013-09-27 10:38:44 +00002575 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002576 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2577 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002578 KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2579 __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2580 this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2581}
2582
2583
2584//------------------------------------------------------------------------------
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002585// __kmp_task_team_wait: Master thread waits for outstanding tasks after the
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002586// barrier gather phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created
Jim Cownie5e8470a2013-09-27 10:38:44 +00002587void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002588__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002589 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002590 )
2591{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002592 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002593
2594 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2595 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2596
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002597 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002598 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2599 __kmp_gtid_from_thread( this_thr ), task_team ) );
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002600 // All worker threads might have dropped through to the release phase, but could still
2601 // be executing tasks. Wait here for all tasks to complete. To avoid memory contention,
2602 // only the master thread checks for the termination condition.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002603 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2604 flag.wait(this_thr, TRUE
2605 USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002606
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002607 // Kill the old task team, so that the worker threads will stop referencing it while spinning.
2608 // They will deallocate it when the reference count reaches zero.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002609 // The master thread is not included in the ref count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002610 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2611 __kmp_gtid_from_thread( this_thr ), task_team ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002612#if OMP_41_ENABLED
2613 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2614 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2615#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002616 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002617#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002618 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2619 KMP_MB();
2620
2621 TCW_PTR(this_thr->th.th_task_team, NULL);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002622 team->t.t_task_team[this_thr->th.th_task_state] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002623 }
2624}
2625
2626
2627//------------------------------------------------------------------------------
2628// __kmp_tasking_barrier:
2629// Internal function to execute all tasks prior to a regular barrier or a
2630// join barrier. It is a full barrier itself, which unfortunately turns
2631// regular barriers into double barriers and join barriers into 1 1/2
2632// barriers.
2633// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2634
2635void
2636__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2637{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002638 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002639 int flag = FALSE;
2640 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2641
2642#if USE_ITT_BUILD
2643 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2644#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002645 kmp_flag_32 spin_flag(spin, 0U);
2646 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2647 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002648#if USE_ITT_BUILD
2649 // TODO: What about itt_sync_obj??
2650 KMP_FSYNC_SPIN_PREPARE( spin );
2651#endif /* USE_ITT_BUILD */
2652
2653 if( TCR_4(__kmp_global.g.g_done) ) {
2654 if( __kmp_global.g.g_abort )
2655 __kmp_abort_thread( );
2656 break;
2657 }
2658 KMP_YIELD( TRUE ); // GH: We always yield here
2659 }
2660#if USE_ITT_BUILD
2661 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2662#endif /* USE_ITT_BUILD */
2663}
2664
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002665
2666#if OMP_41_ENABLED
2667
2668/* __kmp_give_task puts a task into a given thread queue if:
2669 - the queue for that thread it was created
2670 - there's space in that queue
2671
2672 Because of this, __kmp_push_task needs to check if there's space after getting the lock
2673 */
2674static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2675{
2676 kmp_task_team_t * task_team = thread->th.th_task_team;
2677 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2678 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2679 bool result = false;
2680
2681 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2682
2683 // assert tasking is enabled? what if not?
2684 KMP_DEBUG_ASSERT( task_team != NULL );
2685
2686 if (thread_data -> td.td_deque == NULL ) {
2687 // There's no queue in this thread, go find another one
2688 // We're guaranteed that at least one thread has a queue
2689 KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2690 return result;
2691 }
2692
2693 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2694 {
2695 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2696 return result;
2697 }
2698
2699 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2700
2701 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2702 {
2703 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2704 goto release_and_exit;
2705 }
2706
2707 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2708 // Wrap index.
2709 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2710 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2711
2712 result = true;
Jonathan Peyton1406f012015-05-22 22:35:51 +00002713 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002714
2715release_and_exit:
2716 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2717
2718 return result;
2719}
2720
2721
2722/* The finish of the a proxy tasks is divided in two pieces:
2723 - the top half is the one that can be done from a thread outside the team
2724 - the bottom half must be run from a them within the team
2725
2726 In order to run the bottom half the task gets queued back into one of the threads of the team.
2727 Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2728 So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2729 - things that can be run before queuing the bottom half
2730 - things that must be run after queuing the bottom half
2731
2732 This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2733 we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2734*/
2735
2736static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2737{
2738 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2739 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2740 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2741 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2742
2743 taskdata -> td_flags.complete = 1; // mark the task as completed
2744
2745 if ( taskdata->td_taskgroup )
2746 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2747
2748 // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
2749 TCR_4(taskdata->td_incomplete_child_tasks++);
2750}
2751
2752static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2753{
2754 kmp_int32 children = 0;
2755
2756 // Predecrement simulated by "- 1" calculation
2757 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2758 KMP_DEBUG_ASSERT( children >= 0 );
2759
2760 // Remove the imaginary children
2761 TCR_4(taskdata->td_incomplete_child_tasks--);
2762}
2763
2764static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2765{
2766 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2767 kmp_info_t * thread = __kmp_threads[ gtid ];
2768
2769 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2770 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2771
2772 // We need to wait to make sure the top half is finished
2773 // Spinning here should be ok as this should happen quickly
2774 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2775
2776 __kmp_release_deps(gtid,taskdata);
2777 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2778}
2779
2780/*!
2781@ingroup TASKING
2782@param gtid Global Thread ID of encountering thread
2783@param ptask Task which execution is completed
2784
2785Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
2786*/
2787void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2788{
2789 KMP_DEBUG_ASSERT( ptask != NULL );
2790 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2791 KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2792
2793 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2794
2795 __kmp_first_top_half_finish_proxy(taskdata);
2796 __kmp_second_top_half_finish_proxy(taskdata);
2797 __kmp_bottom_half_finish_proxy(gtid,ptask);
2798
2799 KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2800}
2801
2802/*!
2803@ingroup TASKING
2804@param ptask Task which execution is completed
2805
2806Execute the completation of a proxy task from a thread that could not belong to the team.
2807*/
2808void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2809{
2810 KMP_DEBUG_ASSERT( ptask != NULL );
2811 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2812
2813 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2814
2815 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2816
2817 __kmp_first_top_half_finish_proxy(taskdata);
2818
2819 // Enqueue task to complete bottom half completation from a thread within the corresponding team
2820 kmp_team_t * team = taskdata->td_team;
2821 kmp_int32 nthreads = team->t.t_nproc;
2822 kmp_info_t *thread;
2823 kmp_int32 k = 0;
2824
2825 do {
Jonathan Peyton1406f012015-05-22 22:35:51 +00002826 //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002827 //For now we're just linearly trying to find a thread
2828 k = (k+1) % nthreads;
2829 thread = team->t.t_threads[k];
2830 } while ( !__kmp_give_task( thread, k, ptask ) );
2831
2832 __kmp_second_top_half_finish_proxy(taskdata);
2833
2834 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2835}
2836
2837#endif