blob: d1a94f690aae605ff5c0e8d7589dec1a93106cf3 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000020
Andrey Churbanove5f44922015-04-29 16:22:07 +000021#if OMPT_SUPPORT
22#include "ompt-specific.h"
23#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000024
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
26/* ------------------------------------------------------------------------ */
27/* ------------------------------------------------------------------------ */
28
29
30/* forward declaration */
31static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
34
Andrey Churbanov535b6fa2015-05-07 17:41:51 +000035#ifdef OMP_41_ENABLED
36static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
37#endif
38
Jim Cownie4cc4bb42014-10-07 16:25:50 +000039static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
40 switch (((kmp_flag_64 *)flag)->get_type()) {
41 case flag32: __kmp_resume_32(gtid, NULL); break;
42 case flag64: __kmp_resume_64(gtid, NULL); break;
43 case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
44 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000045}
46
47#ifdef BUILD_TIED_TASK_STACK
48
49//---------------------------------------------------------------------------
50// __kmp_trace_task_stack: print the tied tasks from the task stack in order
51// from top do bottom
52//
53// gtid: global thread identifier for thread containing stack
54// thread_data: thread data for task team thread containing stack
55// threshold: value above which the trace statement triggers
56// location: string identifying call site of this function (for trace)
57
58static void
59__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
60{
61 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
62 kmp_taskdata_t **stack_top = task_stack -> ts_top;
63 kmp_int32 entries = task_stack -> ts_entries;
64 kmp_taskdata_t *tied_task;
65
66 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
67 "first_block = %p, stack_top = %p \n",
68 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
69
70 KMP_DEBUG_ASSERT( stack_top != NULL );
71 KMP_DEBUG_ASSERT( entries > 0 );
72
73 while ( entries != 0 )
74 {
75 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
76 // fix up ts_top if we need to pop from previous block
77 if ( entries & TASK_STACK_INDEX_MASK == 0 )
78 {
79 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
80
81 stack_block = stack_block -> sb_prev;
82 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
83 }
84
85 // finish bookkeeping
86 stack_top--;
87 entries--;
88
89 tied_task = * stack_top;
90
91 KMP_DEBUG_ASSERT( tied_task != NULL );
92 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
93
94 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
95 "stack_top=%p, tied_task=%p\n",
96 location, gtid, entries, stack_top, tied_task ) );
97 }
98 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
99
100 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
101 location, gtid ) );
102}
103
104//---------------------------------------------------------------------------
105// __kmp_init_task_stack: initialize the task stack for the first time
106// after a thread_data structure is created.
107// It should not be necessary to do this again (assuming the stack works).
108//
109// gtid: global thread identifier of calling thread
110// thread_data: thread data for task team thread containing stack
111
112static void
113__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
114{
115 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
116 kmp_stack_block_t *first_block;
117
118 // set up the first block of the stack
119 first_block = & task_stack -> ts_first_block;
120 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
121 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
122
123 // initialize the stack to be empty
124 task_stack -> ts_entries = TASK_STACK_EMPTY;
125 first_block -> sb_next = NULL;
126 first_block -> sb_prev = NULL;
127}
128
129
130//---------------------------------------------------------------------------
131// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
132//
133// gtid: global thread identifier for calling thread
134// thread_data: thread info for thread containing stack
135
136static void
137__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
138{
139 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
140 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
141
142 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
143 // free from the second block of the stack
144 while ( stack_block != NULL ) {
145 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
146
147 stack_block -> sb_next = NULL;
148 stack_block -> sb_prev = NULL;
149 if (stack_block != & task_stack -> ts_first_block) {
150 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
151 }
152 stack_block = next_block;
153 }
154 // initialize the stack to be empty
155 task_stack -> ts_entries = 0;
156 task_stack -> ts_top = NULL;
157}
158
159
160//---------------------------------------------------------------------------
161// __kmp_push_task_stack: Push the tied task onto the task stack.
162// Grow the stack if necessary by allocating another block.
163//
164// gtid: global thread identifier for calling thread
165// thread: thread info for thread containing stack
166// tied_task: the task to push on the stack
167
168static void
169__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
170{
171 // GEH - need to consider what to do if tt_threads_data not allocated yet
172 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
173 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
174 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
175
176 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
177 return; // Don't push anything on stack if team or team tasks are serialized
178 }
179
180 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
181 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
182
183 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
184 gtid, thread, tied_task ) );
185 // Store entry
186 * (task_stack -> ts_top) = tied_task;
187
188 // Do bookkeeping for next push
189 task_stack -> ts_top++;
190 task_stack -> ts_entries++;
191
192 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
193 {
194 // Find beginning of this task block
195 kmp_stack_block_t *stack_block =
196 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
197
198 // Check if we already have a block
199 if ( stack_block -> sb_next != NULL )
200 { // reset ts_top to beginning of next block
201 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
202 }
203 else
204 { // Alloc new block and link it up
205 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
206 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
207
208 task_stack -> ts_top = & new_block -> sb_block[0];
209 stack_block -> sb_next = new_block;
210 new_block -> sb_prev = stack_block;
211 new_block -> sb_next = NULL;
212
213 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
214 gtid, tied_task, new_block ) );
215 }
216 }
217 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
218}
219
220//---------------------------------------------------------------------------
221// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
222// the task, just check to make sure it matches the ending task passed in.
223//
224// gtid: global thread identifier for the calling thread
225// thread: thread info structure containing stack
226// tied_task: the task popped off the stack
227// ending_task: the task that is ending (should match popped task)
228
229static void
230__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
231{
232 // GEH - need to consider what to do if tt_threads_data not allocated yet
233 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
234 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
235 kmp_taskdata_t *tied_task;
236
237 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
238 return; // Don't pop anything from stack if team or team tasks are serialized
239 }
240
241 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
242 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
243
244 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
245
246 // fix up ts_top if we need to pop from previous block
247 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
248 {
249 kmp_stack_block_t *stack_block =
250 (kmp_stack_block_t *) (task_stack -> ts_top) ;
251
252 stack_block = stack_block -> sb_prev;
253 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
254 }
255
256 // finish bookkeeping
257 task_stack -> ts_top--;
258 task_stack -> ts_entries--;
259
260 tied_task = * (task_stack -> ts_top );
261
262 KMP_DEBUG_ASSERT( tied_task != NULL );
263 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
264 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
265
266 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
267 return;
268}
269#endif /* BUILD_TIED_TASK_STACK */
270
271//---------------------------------------------------
272// __kmp_push_task: Add a task to the thread's deque
273
274static kmp_int32
275__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
276{
277 kmp_info_t * thread = __kmp_threads[ gtid ];
278 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
279 kmp_task_team_t * task_team = thread->th.th_task_team;
280 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
281 kmp_thread_data_t * thread_data;
282
283 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
284
285 // The first check avoids building task_team thread data if serialized
286 if ( taskdata->td_flags.task_serial ) {
287 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
288 gtid, taskdata ) );
289 return TASK_NOT_PUSHED;
290 }
291
292 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
293 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000294 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000295 __kmp_enable_tasking( task_team, thread );
296 }
297 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
298 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
299
300 // Find tasking deque specific to encountering thread
301 thread_data = & task_team -> tt.tt_threads_data[ tid ];
302
303 // No lock needed since only owner can allocate
304 if (thread_data -> td.td_deque == NULL ) {
305 __kmp_alloc_task_deque( thread, thread_data );
306 }
307
308 // Check if deque is full
309 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
310 {
311 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
312 gtid, taskdata ) );
313 return TASK_NOT_PUSHED;
314 }
315
316 // Lock the deque for the task push operation
317 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
318
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000319#if OMP_41_ENABLED
320 // Need to recheck as we can get a proxy task from a thread outside of OpenMP
321 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
322 {
323 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
324 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
325 gtid, taskdata ) );
326 return TASK_NOT_PUSHED;
327 }
328#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000329 // Must have room since no thread can add tasks but calling thread
330 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000331#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000332
333 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
334 // Wrap index.
335 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
336 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
337
338 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
339
340 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
341 "task=%p ntasks=%d head=%u tail=%u\n",
342 gtid, taskdata, thread_data->td.td_deque_ntasks,
343 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
344
345 return TASK_SUCCESSFULLY_PUSHED;
346}
347
348
349//-----------------------------------------------------------------------------------------
350// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
351// this_thr: thread structure to set current_task in.
352
353void
354__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
355{
356 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
357 "curtask_parent=%p\n",
358 0, this_thr, this_thr -> th.th_current_task,
359 this_thr -> th.th_current_task -> td_parent ) );
360
361 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
362
363 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
364 "curtask_parent=%p\n",
365 0, this_thr, this_thr -> th.th_current_task,
366 this_thr -> th.th_current_task -> td_parent ) );
367}
368
369
370//---------------------------------------------------------------------------------------
371// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
372// this_thr: thread structure to set up
373// team: team for implicit task data
374// tid: thread within team to set up
375
376void
377__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
378{
379 // current task of the thread is a parent of the new just created implicit tasks of new team
380 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
381 "parent_task=%p\n",
382 tid, this_thr, this_thr->th.th_current_task,
383 team->t.t_implicit_task_taskdata[tid].td_parent ) );
384
385 KMP_DEBUG_ASSERT (this_thr != NULL);
386
387 if( tid == 0 ) {
388 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
389 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
390 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
391 }
392 } else {
393 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
394 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
395 }
396
397 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
398 "parent_task=%p\n",
399 tid, this_thr, this_thr->th.th_current_task,
400 team->t.t_implicit_task_taskdata[tid].td_parent ) );
401}
402
403
404//----------------------------------------------------------------------
405// __kmp_task_start: bookkeeping for a task starting execution
406// GTID: global thread id of calling thread
407// task: task starting execution
408// current_task: task suspending
409
410static void
411__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
412{
413 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
414 kmp_info_t * thread = __kmp_threads[ gtid ];
415
416 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
417 gtid, taskdata, current_task) );
418
419 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
420
421 // mark currently executing task as suspended
422 // TODO: GEH - make sure root team implicit task is initialized properly.
423 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
424 current_task -> td_flags.executing = 0;
425
426 // Add task to stack if tied
427#ifdef BUILD_TIED_TASK_STACK
428 if ( taskdata -> td_flags.tiedness == TASK_TIED )
429 {
430 __kmp_push_task_stack( gtid, thread, taskdata );
431 }
432#endif /* BUILD_TIED_TASK_STACK */
433
434 // mark starting task as executing and as current task
435 thread -> th.th_current_task = taskdata;
436
437 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
438 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
439 taskdata -> td_flags.started = 1;
440 taskdata -> td_flags.executing = 1;
441 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
442 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
443
444 // GEH TODO: shouldn't we pass some sort of location identifier here?
445 // APT: yes, we will pass location here.
446 // need to store current thread state (in a thread or taskdata structure)
447 // before setting work_state, otherwise wrong state is set after end of task
448
449 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
450 gtid, taskdata ) );
451
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000452#if OMPT_SUPPORT
453 if ((ompt_status == ompt_status_track_callback) &&
454 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
455 kmp_taskdata_t *parent = taskdata->td_parent;
456 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
457 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
458 parent ? &(parent->ompt_task_info.frame) : NULL,
459 taskdata->ompt_task_info.task_id,
460 taskdata->ompt_task_info.function);
461 }
462#endif
463
Jim Cownie5e8470a2013-09-27 10:38:44 +0000464 return;
465}
466
467
468//----------------------------------------------------------------------
469// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
470// loc_ref: source location information; points to beginning of task block.
471// gtid: global thread number.
472// task: task thunk for the started task.
473
474void
475__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
476{
477 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
478 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
479
480 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
481 gtid, loc_ref, taskdata, current_task ) );
482
483 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
484 __kmp_task_start( gtid, task, current_task );
485
486 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
487 gtid, loc_ref, taskdata ) );
488
489 return;
490}
491
492#ifdef TASK_UNUSED
493//----------------------------------------------------------------------
494// __kmpc_omp_task_begin: report that a given task has started execution
495// NEVER GENERATED BY COMPILER, DEPRECATED!!!
496
497void
498__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
499{
500 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
501
502 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
503 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
504
505 __kmp_task_start( gtid, task, current_task );
506
507 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
508 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
509
510 return;
511}
512#endif // TASK_UNUSED
513
514
515//-------------------------------------------------------------------------------------
516// __kmp_free_task: free the current task space and the space for shareds
517// gtid: Global thread ID of calling thread
518// taskdata: task to free
519// thread: thread data structure of caller
520
521static void
522__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
523{
524 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
525 gtid, taskdata) );
526
527 // Check to make sure all flags and counters have the correct values
528 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
529 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
530 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
531 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
532 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
533 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
534
535 taskdata->td_flags.freed = 1;
536 // deallocate the taskdata and shared variable blocks associated with this task
537 #if USE_FAST_MEMORY
538 __kmp_fast_free( thread, taskdata );
539 #else /* ! USE_FAST_MEMORY */
540 __kmp_thread_free( thread, taskdata );
541 #endif
542
543 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
544 gtid, taskdata) );
545}
546
547//-------------------------------------------------------------------------------------
548// __kmp_free_task_and_ancestors: free the current task and ancestors without children
549//
550// gtid: Global thread ID of calling thread
551// taskdata: task to free
552// thread: thread data structure of caller
553
554static void
555__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
556{
557 kmp_int32 children = 0;
558 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
559
560 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
561
562 if ( !team_or_tasking_serialized ) {
563 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
564 KMP_DEBUG_ASSERT( children >= 0 );
565 }
566
567 // Now, go up the ancestor tree to see if any ancestors can now be freed.
568 while ( children == 0 )
569 {
570 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
571
572 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
573 "and freeing itself\n", gtid, taskdata) );
574
575 // --- Deallocate my ancestor task ---
576 __kmp_free_task( gtid, taskdata, thread );
577
578 taskdata = parent_taskdata;
579
580 // Stop checking ancestors at implicit task or if tasking serialized
581 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
582 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
583 return;
584
585 if ( !team_or_tasking_serialized ) {
586 // Predecrement simulated by "- 1" calculation
587 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
588 KMP_DEBUG_ASSERT( children >= 0 );
589 }
590 }
591
592 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
593 "not freeing it yet\n", gtid, taskdata, children) );
594}
595
596//---------------------------------------------------------------------
597// __kmp_task_finish: bookkeeping to do when a task finishes execution
598// gtid: global thread ID for calling thread
599// task: task to be finished
600// resumed_task: task to be resumed. (may be NULL if task is serialized)
601
602static void
603__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
604{
605 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
606 kmp_info_t * thread = __kmp_threads[ gtid ];
607 kmp_int32 children = 0;
608
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000609#if OMPT_SUPPORT
610 if ((ompt_status == ompt_status_track_callback) &&
611 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
612 kmp_taskdata_t *parent = taskdata->td_parent;
613 ompt_callbacks.ompt_callback(ompt_event_task_end)(
614 taskdata->ompt_task_info.task_id);
615 }
616#endif
617
Jim Cownie5e8470a2013-09-27 10:38:44 +0000618 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
619 gtid, taskdata, resumed_task) );
620
621 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
622
623 // Pop task from stack if tied
624#ifdef BUILD_TIED_TASK_STACK
625 if ( taskdata -> td_flags.tiedness == TASK_TIED )
626 {
627 __kmp_pop_task_stack( gtid, thread, taskdata );
628 }
629#endif /* BUILD_TIED_TASK_STACK */
630
Jim Cownie5e8470a2013-09-27 10:38:44 +0000631 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000632 taskdata -> td_flags.complete = 1; // mark the task as completed
633 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
634 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
635
636 // Only need to keep track of count if team parallel and tasking not serialized
637 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
638 // Predecrement simulated by "- 1" calculation
639 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
640 KMP_DEBUG_ASSERT( children >= 0 );
641#if OMP_40_ENABLED
642 if ( taskdata->td_taskgroup )
643 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000644 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000645#endif
646 }
647
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000648 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
649 // Othertwise, if a task is executed immediately from the release_deps code
650 // the flag will be reset to 1 again by this same function
651 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
652 taskdata -> td_flags.executing = 0; // suspend the finishing task
653
Jim Cownie5e8470a2013-09-27 10:38:44 +0000654 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
655 gtid, taskdata, children) );
656
Jim Cownie181b4bb2013-12-23 17:28:57 +0000657#if OMP_40_ENABLED
658 /* If the tasks' destructor thunk flag has been set, we need to invoke the
659 destructor thunk that has been generated by the compiler.
660 The code is placed here, since at this point other tasks might have been released
661 hence overlapping the destructor invokations with some other work in the
662 released tasks. The OpenMP spec is not specific on when the destructors are
663 invoked, so we should be free to choose.
664 */
665 if (taskdata->td_flags.destructors_thunk) {
666 kmp_routine_entry_t destr_thunk = task->destructors;
667 KMP_ASSERT(destr_thunk);
668 destr_thunk(gtid, task);
669 }
670#endif // OMP_40_ENABLED
671
Jim Cownie5e8470a2013-09-27 10:38:44 +0000672 // bookkeeping for resuming task:
673 // GEH - note tasking_ser => task_serial
674 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
675 taskdata->td_flags.task_serial);
676 if ( taskdata->td_flags.task_serial )
677 {
678 if (resumed_task == NULL) {
679 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
680 }
681 else {
682 // verify resumed task passed in points to parent
683 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
684 }
685 }
686 else {
687 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
688 }
689
690 // Free this task and then ancestor tasks if they have no children.
691 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
692
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000693 // FIXME johnmc: I this statement should be before the last one so if an
694 // asynchronous inquiry peers into the runtime system it doesn't see the freed
695 // task as the current task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000696 __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
697
698 // TODO: GEH - make sure root team implicit task is initialized properly.
699 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
700 resumed_task->td_flags.executing = 1; // resume previous task
701
702 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
703 gtid, taskdata, resumed_task) );
704
705 return;
706}
707
708//---------------------------------------------------------------------
709// __kmpc_omp_task_complete_if0: report that a task has completed execution
710// loc_ref: source location information; points to end of task block.
711// gtid: global thread number.
712// task: task thunk for the completed task.
713
714void
715__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
716{
717 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
718 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
719
720 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
721
722 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
723 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
724
725 return;
726}
727
728#ifdef TASK_UNUSED
729//---------------------------------------------------------------------
730// __kmpc_omp_task_complete: report that a task has completed execution
731// NEVER GENERATED BY COMPILER, DEPRECATED!!!
732
733void
734__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
735{
736 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
737 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
738
739 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
740
741 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
742 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
743 return;
744}
745#endif // TASK_UNUSED
746
747
Andrey Churbanove5f44922015-04-29 16:22:07 +0000748#if OMPT_SUPPORT
749//----------------------------------------------------------------------------------------------------
750// __kmp_task_init_ompt:
751// Initialize OMPT fields maintained by a task. Since the serial task is initialized before
752// ompt_initialize is called, at the point the serial task is initialized we don't know whether
753// OMPT will be used or not when the serial task is initialized. This function provides the support
754// needed to initialize OMPT for the serial task after the fact.
755
756void
757__kmp_task_init_ompt( kmp_taskdata_t * task, int tid )
758{
759 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
760 task->ompt_task_info.function = NULL;
Jonathan Peytonda7c8ab2015-06-29 17:33:03 +0000761 task->ompt_task_info.frame.exit_runtime_frame = NULL;
762 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
Andrey Churbanove5f44922015-04-29 16:22:07 +0000763}
764#endif
765
766
Jim Cownie5e8470a2013-09-27 10:38:44 +0000767//----------------------------------------------------------------------------------------------------
768// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
769//
770// loc_ref: reference to source location of parallel region
771// this_thr: thread data structure corresponding to implicit task
772// team: team for this_thr
773// tid: thread id of given thread within team
774// set_curr_task: TRUE if need to push current task to thread
775// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
776// TODO: Get better loc_ref. Value passed in may be NULL
777
778void
779__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
780{
781 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
782
783 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
784 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
785
786 task->td_task_id = KMP_GEN_TASK_ID();
787 task->td_team = team;
788// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
789 task->td_ident = loc_ref;
790 task->td_taskwait_ident = NULL;
791 task->td_taskwait_counter = 0;
792 task->td_taskwait_thread = 0;
793
794 task->td_flags.tiedness = TASK_TIED;
795 task->td_flags.tasktype = TASK_IMPLICIT;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000796#if OMP_41_ENABLED
797 task->td_flags.proxy = TASK_FULL;
798#endif
799
Jim Cownie5e8470a2013-09-27 10:38:44 +0000800 // All implicit tasks are executed immediately, not deferred
801 task->td_flags.task_serial = 1;
802 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
803 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
804
805 task->td_flags.started = 1;
806 task->td_flags.executing = 1;
807 task->td_flags.complete = 0;
808 task->td_flags.freed = 0;
809
Jim Cownie181b4bb2013-12-23 17:28:57 +0000810#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000811 task->td_dephash = NULL;
812 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000813#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000814
815 if (set_curr_task) { // only do this initialization the first time a thread is created
816 task->td_incomplete_child_tasks = 0;
817 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
818#if OMP_40_ENABLED
819 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
820#endif
821 __kmp_push_current_task_to_thread( this_thr, team, tid );
822 } else {
823 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
824 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
825 }
826
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000827#if OMPT_SUPPORT
828 __kmp_task_init_ompt(task, tid);
829#endif
830
Jim Cownie5e8470a2013-09-27 10:38:44 +0000831 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
832 tid, team, task ) );
833}
834
835// Round up a size to a power of two specified by val
836// Used to insert padding between structures co-allocated using a single malloc() call
837static size_t
838__kmp_round_up_to_val( size_t size, size_t val ) {
839 if ( size & ( val - 1 ) ) {
840 size &= ~ ( val - 1 );
841 if ( size <= KMP_SIZE_T_MAX - val ) {
842 size += val; // Round up if there is no overflow.
843 }; // if
844 }; // if
845 return size;
846} // __kmp_round_up_to_va
847
848
849//---------------------------------------------------------------------------------
850// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
851//
852// loc_ref: source location information
853// gtid: global thread number.
854// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
855// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
856// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
857// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
858// task_entry: Pointer to task code entry point generated by compiler.
859// returns: a pointer to the allocated kmp_task_t structure (task).
860
861kmp_task_t *
862__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
863 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
864 kmp_routine_entry_t task_entry )
865{
866 kmp_task_t *task;
867 kmp_taskdata_t *taskdata;
868 kmp_info_t *thread = __kmp_threads[ gtid ];
869 kmp_team_t *team = thread->th.th_team;
870 kmp_taskdata_t *parent_task = thread->th.th_current_task;
871 size_t shareds_offset;
872
873 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
874 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
875 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
876 sizeof_shareds, task_entry) );
877
878 if ( parent_task->td_flags.final ) {
879 if (flags->merged_if0) {
880 }
881 flags->final = 1;
882 }
883
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000884#if OMP_41_ENABLED
885 if ( flags->proxy == TASK_PROXY ) {
886 flags->tiedness = TASK_UNTIED;
887 flags->merged_if0 = 1;
888
889 /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
890 if ( (thread->th.th_task_team) == NULL ) {
891 /* This should only happen if the team is serialized
892 setup a task team and propagate it to the thread
893 */
894 KMP_DEBUG_ASSERT(team->t.t_serialized);
895 KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
896 __kmp_task_team_setup(thread,team,0,1); // 0,1 indicates only setup the current team regardless of nthreads
897 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
898 }
899 kmp_task_team_t * task_team = thread->th.th_task_team;
900
901 /* tasking must be enabled now as the task might not be pushed */
902 if ( !KMP_TASKING_ENABLED( task_team ) ) {
903 KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
904 __kmp_enable_tasking( task_team, thread );
905 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
906 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
907 // No lock needed since only owner can allocate
908 if (thread_data -> td.td_deque == NULL ) {
909 __kmp_alloc_task_deque( thread, thread_data );
910 }
911 }
912
913 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
914 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
915 }
916#endif
917
Jim Cownie5e8470a2013-09-27 10:38:44 +0000918 // Calculate shared structure offset including padding after kmp_task_t struct
919 // to align pointers in shared struct
920 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
921 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
922
923 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
924 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
925 gtid, shareds_offset) );
926 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
927 gtid, sizeof_shareds) );
928
929 // Avoid double allocation here by combining shareds with taskdata
930 #if USE_FAST_MEMORY
931 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
932 #else /* ! USE_FAST_MEMORY */
933 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
934 #endif /* USE_FAST_MEMORY */
935
936 task = KMP_TASKDATA_TO_TASK(taskdata);
937
938 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000939#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000940 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
941 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
942#else
943 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
944 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
945#endif
946 if (sizeof_shareds > 0) {
947 // Avoid double allocation here by combining shareds with taskdata
948 task->shareds = & ((char *) taskdata)[ shareds_offset ];
949 // Make sure shareds struct is aligned to pointer size
950 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
951 } else {
952 task->shareds = NULL;
953 }
954 task->routine = task_entry;
955 task->part_id = 0; // AC: Always start with 0 part id
956
957 taskdata->td_task_id = KMP_GEN_TASK_ID();
958 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000959 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000960 taskdata->td_parent = parent_task;
961 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
962 taskdata->td_ident = loc_ref;
963 taskdata->td_taskwait_ident = NULL;
964 taskdata->td_taskwait_counter = 0;
965 taskdata->td_taskwait_thread = 0;
966 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000967#if OMP_41_ENABLED
968 // avoid copying icvs for proxy tasks
969 if ( flags->proxy == TASK_FULL )
970#endif
971 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000972
973 taskdata->td_flags.tiedness = flags->tiedness;
974 taskdata->td_flags.final = flags->final;
975 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000976#if OMP_40_ENABLED
977 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
978#endif // OMP_40_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000979#if OMP_41_ENABLED
980 taskdata->td_flags.proxy = flags->proxy;
981#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000982 taskdata->td_flags.tasktype = TASK_EXPLICIT;
983
984 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
985 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
986
987 // GEH - TODO: fix this to copy parent task's value of team_serial flag
988 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
989
990 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
991 // tasks are not left until program termination to execute. Also, it helps locality to execute
992 // immediately.
Jonathan Peyton7881aa12015-05-21 21:16:38 +0000993 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
Jim Cownie5e8470a2013-09-27 10:38:44 +0000994 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
995
996 taskdata->td_flags.started = 0;
997 taskdata->td_flags.executing = 0;
998 taskdata->td_flags.complete = 0;
999 taskdata->td_flags.freed = 0;
1000
1001 taskdata->td_flags.native = flags->native;
1002
1003 taskdata->td_incomplete_child_tasks = 0;
1004 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1005#if OMP_40_ENABLED
1006 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1007 taskdata->td_dephash = NULL;
1008 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001009#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001010
1011 // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
1012#if OMP_41_ENABLED
1013 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1014#else
1015 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1016#endif
1017 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001018 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1019#if OMP_40_ENABLED
1020 if ( parent_task->td_taskgroup )
1021 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1022#endif
1023 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1024 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1025 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1026 }
1027 }
1028
1029 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1030 gtid, taskdata, taskdata->td_parent) );
1031
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001032#if OMPT_SUPPORT
1033 if (ompt_status & ompt_status_track) {
1034 taskdata->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1035 taskdata->ompt_task_info.function = (void*) task_entry;
Jonathan Peytonda7c8ab2015-06-29 17:33:03 +00001036 taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
1037 taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001038 }
1039#endif
1040
Jim Cownie5e8470a2013-09-27 10:38:44 +00001041 return task;
1042}
1043
1044
1045kmp_task_t *
1046__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1047 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1048 kmp_routine_entry_t task_entry )
1049{
1050 kmp_task_t *retval;
1051 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1052
1053 input_flags->native = FALSE;
1054 // __kmp_task_alloc() sets up all other runtime flags
1055
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001056#if OMP_41_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001057 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001058 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1059 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001060 input_flags->proxy ? "proxy" : "",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001061 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001062#else
1063 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1064 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1065 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1066 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1067#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001068
1069 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1070 sizeof_shareds, task_entry );
1071
1072 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1073
1074 return retval;
1075}
1076
1077//-----------------------------------------------------------
1078// __kmp_invoke_task: invoke the specified task
1079//
1080// gtid: global thread ID of caller
1081// task: the task to invoke
1082// current_task: the task to resume after task invokation
1083
1084static void
1085__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1086{
1087 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001088#if OMP_40_ENABLED
1089 int discard = 0 /* false */;
1090#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001091 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1092 gtid, taskdata, current_task) );
1093
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001094#if OMP_41_ENABLED
1095 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1096 taskdata->td_flags.complete == 1)
1097 {
1098 // This is a proxy task that was already completed but it needs to run
1099 // its bottom-half finish
1100 KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1101 gtid, taskdata) );
1102
1103 __kmp_bottom_half_finish_proxy(gtid,task);
1104
1105 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1106
1107 return;
1108 }
1109#endif
1110
1111#if OMP_41_ENABLED
1112 // Proxy tasks are not handled by the runtime
1113 if ( taskdata->td_flags.proxy != TASK_PROXY )
1114#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001115 __kmp_task_start( gtid, task, current_task );
1116
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001117#if OMPT_SUPPORT
1118 ompt_thread_info_t oldInfo;
1119 kmp_info_t * thread;
1120 if (ompt_status & ompt_status_track) {
1121 // Store the threads states and restore them after the task
1122 thread = __kmp_threads[ gtid ];
1123 oldInfo = thread->th.ompt_thread_info;
1124 thread->th.ompt_thread_info.wait_id = 0;
1125 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1126 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1127 }
1128#endif
1129
Jim Cownie181b4bb2013-12-23 17:28:57 +00001130#if OMP_40_ENABLED
1131 // TODO: cancel tasks if the parallel region has also been cancelled
1132 // TODO: check if this sequence can be hoisted above __kmp_task_start
1133 // if cancellation has been enabled for this run ...
1134 if (__kmp_omp_cancellation) {
1135 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1136 kmp_team_t * this_team = this_thr->th.th_team;
1137 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1138 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1139 // this task belongs to a task group and we need to cancel it
1140 discard = 1 /* true */;
1141 }
1142 }
1143
Jim Cownie5e8470a2013-09-27 10:38:44 +00001144 //
1145 // Invoke the task routine and pass in relevant data.
1146 // Thunks generated by gcc take a different argument list.
1147 //
Jim Cownie181b4bb2013-12-23 17:28:57 +00001148 if (!discard) {
1149#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001150#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +00001151 if (taskdata->td_flags.native) {
1152 ((void (*)(void *))(*(task->routine)))(task->shareds);
1153 }
1154 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001155#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001156 {
1157 (*(task->routine))(gtid, task);
1158 }
1159#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001160 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001161#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001162
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001163
1164#if OMPT_SUPPORT
1165 if (ompt_status & ompt_status_track) {
1166 thread->th.ompt_thread_info = oldInfo;
1167 taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1168 }
1169#endif
1170
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001171#if OMP_41_ENABLED
1172 // Proxy tasks are not handled by the runtime
1173 if ( taskdata->td_flags.proxy != TASK_PROXY )
1174#endif
1175 __kmp_task_finish( gtid, task, current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001176
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001177 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001178 gtid, taskdata, current_task) );
1179 return;
1180}
1181
1182//-----------------------------------------------------------------------
1183// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1184//
1185// loc_ref: location of original task pragma (ignored)
1186// gtid: Global Thread ID of encountering thread
1187// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1188// Returns:
1189// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1190// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1191
1192kmp_int32
1193__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1194{
1195 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1196
1197 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1198 gtid, loc_ref, new_taskdata ) );
1199
1200 /* Should we execute the new task or queue it? For now, let's just always try to
1201 queue it. If the queue fills up, then we'll execute it. */
1202
1203 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1204 { // Execute this task immediately
1205 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1206 new_taskdata->td_flags.task_serial = 1;
1207 __kmp_invoke_task( gtid, new_task, current_task );
1208 }
1209
1210 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1211 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1212 new_taskdata ) );
1213
1214 return TASK_CURRENT_NOT_QUEUED;
1215}
1216
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001217//---------------------------------------------------------------------
1218// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1219// gtid: Global Thread ID of encountering thread
1220// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1221// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1222// returns:
1223//
1224// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1225// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1226kmp_int32
1227__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1228{
1229 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1230
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001231#if OMPT_SUPPORT
1232 if (ompt_status & ompt_status_track) {
1233 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1234 __builtin_frame_address(0);
1235 }
1236#endif
1237
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001238 /* Should we execute the new task or queue it? For now, let's just always try to
1239 queue it. If the queue fills up, then we'll execute it. */
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001240#if OMP_41_ENABLED
1241 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1242#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001243 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001244#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001245 { // Execute this task immediately
1246 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1247 if ( serialize_immediate )
1248 new_taskdata -> td_flags.task_serial = 1;
1249 __kmp_invoke_task( gtid, new_task, current_task );
1250 }
1251
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001252#if OMPT_SUPPORT
1253 if (ompt_status & ompt_status_track) {
1254 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1255 }
1256#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001257
1258 return TASK_CURRENT_NOT_QUEUED;
1259}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001260
1261//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001262// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1263// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001264// loc_ref: location of original task pragma (ignored)
1265// gtid: Global Thread ID of encountering thread
1266// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1267// returns:
1268//
1269// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1270// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1271
1272kmp_int32
1273__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1274{
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001275 kmp_taskdata_t * new_taskdata;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001276 kmp_int32 res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001277
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001278 new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001279 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1280 gtid, loc_ref, new_taskdata ) );
1281
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001282 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001283
1284 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1285 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001286 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001287}
1288
Jim Cownie5e8470a2013-09-27 10:38:44 +00001289//-------------------------------------------------------------------------------------
1290// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1291
1292kmp_int32
1293__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1294{
1295 kmp_taskdata_t * taskdata;
1296 kmp_info_t * thread;
1297 int thread_finished = FALSE;
1298
1299 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1300 gtid, loc_ref) );
1301
1302 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1303 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1304
1305 thread = __kmp_threads[ gtid ];
1306 taskdata = thread -> th.th_current_task;
1307#if USE_ITT_BUILD
1308 // Note: These values are used by ITT events as well.
1309#endif /* USE_ITT_BUILD */
1310 taskdata->td_taskwait_counter += 1;
1311 taskdata->td_taskwait_ident = loc_ref;
1312 taskdata->td_taskwait_thread = gtid + 1;
1313
1314#if USE_ITT_BUILD
1315 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1316 if ( itt_sync_obj != NULL )
1317 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1318#endif /* USE_ITT_BUILD */
1319
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001320#if OMP_41_ENABLED
1321 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1322#else
1323 if ( ! taskdata->td_flags.team_serial )
1324#endif
1325 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001326 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001327 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001328 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001329 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1330 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001331 }
1332 }
1333#if USE_ITT_BUILD
1334 if ( itt_sync_obj != NULL )
1335 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1336#endif /* USE_ITT_BUILD */
1337
1338 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1339 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1340 }
1341
1342 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1343 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1344
1345 return TASK_CURRENT_NOT_QUEUED;
1346}
1347
1348
1349//-------------------------------------------------
1350// __kmpc_omp_taskyield: switch to a different task
1351
1352kmp_int32
1353__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1354{
1355 kmp_taskdata_t * taskdata;
1356 kmp_info_t * thread;
1357 int thread_finished = FALSE;
1358
1359 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1360 gtid, loc_ref, end_part) );
1361
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001362 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001363 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1364
1365 thread = __kmp_threads[ gtid ];
1366 taskdata = thread -> th.th_current_task;
1367 // Should we model this as a task wait or not?
1368#if USE_ITT_BUILD
1369 // Note: These values are used by ITT events as well.
1370#endif /* USE_ITT_BUILD */
1371 taskdata->td_taskwait_counter += 1;
1372 taskdata->td_taskwait_ident = loc_ref;
1373 taskdata->td_taskwait_thread = gtid + 1;
1374
1375#if USE_ITT_BUILD
1376 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1377 if ( itt_sync_obj != NULL )
1378 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1379#endif /* USE_ITT_BUILD */
1380 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001381 kmp_task_team_t * task_team = thread->th.th_task_team;
1382 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001383 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001384 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1385 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1386 }
1387 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001388 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001389#if USE_ITT_BUILD
1390 if ( itt_sync_obj != NULL )
1391 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1392#endif /* USE_ITT_BUILD */
1393
1394 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1395 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1396 }
1397
1398 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1399 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1400
1401 return TASK_CURRENT_NOT_QUEUED;
1402}
1403
1404
1405#if OMP_40_ENABLED
1406//-------------------------------------------------------------------------------------
1407// __kmpc_taskgroup: Start a new taskgroup
1408
1409void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001410__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001411{
1412 kmp_info_t * thread = __kmp_threads[ gtid ];
1413 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1414 kmp_taskgroup_t * tg_new =
1415 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1416 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1417 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001418 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001419 tg_new->parent = taskdata->td_taskgroup;
1420 taskdata->td_taskgroup = tg_new;
1421}
1422
1423
1424//-------------------------------------------------------------------------------------
1425// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1426// and its descendants are complete
1427
1428void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001429__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001430{
1431 kmp_info_t * thread = __kmp_threads[ gtid ];
1432 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1433 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1434 int thread_finished = FALSE;
1435
1436 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1437 KMP_DEBUG_ASSERT( taskgroup != NULL );
1438
1439 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1440#if USE_ITT_BUILD
1441 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1442 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1443 if ( itt_sync_obj != NULL )
1444 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1445#endif /* USE_ITT_BUILD */
1446
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001447#if OMP_41_ENABLED
1448 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1449#else
1450 if ( ! taskdata->td_flags.team_serial )
1451#endif
1452 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001453 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001454 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001455 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1456 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001457 }
1458 }
1459
1460#if USE_ITT_BUILD
1461 if ( itt_sync_obj != NULL )
1462 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1463#endif /* USE_ITT_BUILD */
1464 }
1465 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1466
1467 // Restore parent taskgroup for the current task
1468 taskdata->td_taskgroup = taskgroup->parent;
1469 __kmp_thread_free( thread, taskgroup );
1470
1471 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1472}
1473#endif
1474
1475
1476//------------------------------------------------------
1477// __kmp_remove_my_task: remove a task from my own deque
1478
1479static kmp_task_t *
1480__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1481 kmp_int32 is_constrained )
1482{
1483 kmp_task_t * task;
1484 kmp_taskdata_t * taskdata;
1485 kmp_thread_data_t *thread_data;
1486 kmp_uint32 tail;
1487
1488 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1489 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1490
1491 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1492
1493 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1494 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1495 thread_data->td.td_deque_tail) );
1496
1497 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1498 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1499 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1500 thread_data->td.td_deque_tail) );
1501 return NULL;
1502 }
1503
1504 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1505
1506 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1507 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1508 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1509 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1510 thread_data->td.td_deque_tail) );
1511 return NULL;
1512 }
1513
1514 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1515 taskdata = thread_data -> td.td_deque[ tail ];
1516
1517 if (is_constrained) {
1518 // we need to check if the candidate obeys task scheduling constraint:
1519 // only child of current task can be scheduled
1520 kmp_taskdata_t * current = thread->th.th_current_task;
1521 kmp_int32 level = current->td_level;
1522 kmp_taskdata_t * parent = taskdata->td_parent;
1523 while ( parent != current && parent->td_level > level ) {
1524 parent = parent->td_parent; // check generation up to the level of the current task
1525 KMP_DEBUG_ASSERT(parent != NULL);
1526 }
1527 if ( parent != current ) {
1528 // If the tail task is not a child, then no other childs can appear in the deque.
1529 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1530 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1531 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1532 thread_data->td.td_deque_tail) );
1533 return NULL;
1534 }
1535 }
1536
1537 thread_data -> td.td_deque_tail = tail;
1538 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1539
1540 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1541
1542 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1543 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1544 thread_data->td.td_deque_tail) );
1545
1546 task = KMP_TASKDATA_TO_TASK( taskdata );
1547 return task;
1548}
1549
1550
1551//-----------------------------------------------------------
1552// __kmp_steal_task: remove a task from another thread's deque
1553// Assume that calling thread has already checked existence of
1554// task_team thread_data before calling this routine.
1555
1556static kmp_task_t *
1557__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1558 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1559 kmp_int32 is_constrained )
1560{
1561 kmp_task_t * task;
1562 kmp_taskdata_t * taskdata;
1563 kmp_thread_data_t *victim_td, *threads_data;
Jonathan Peyton7c4d66d2015-06-08 20:01:14 +00001564 kmp_int32 victim_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001565
1566 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1567
1568 threads_data = task_team -> tt.tt_threads_data;
1569 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1570
1571 victim_tid = victim->th.th_info.ds.ds_tid;
1572 victim_td = & threads_data[ victim_tid ];
1573
1574 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1575 "head=%u tail=%u\n",
1576 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1577 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1578
1579 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1580 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1581 {
1582 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1583 "ntasks=%d head=%u tail=%u\n",
1584 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1585 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1586 return NULL;
1587 }
1588
1589 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1590
1591 // Check again after we acquire the lock
1592 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1593 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1594 {
1595 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1596 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1597 "ntasks=%d head=%u tail=%u\n",
1598 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1599 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1600 return NULL;
1601 }
1602
1603 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1604
1605 if ( !is_constrained ) {
1606 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1607 // Bump head pointer and Wrap.
1608 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1609 } else {
1610 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1611 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1612 taskdata = victim_td -> td.td_deque[ tail ];
1613 // we need to check if the candidate obeys task scheduling constraint:
1614 // only child of current task can be scheduled
1615 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1616 kmp_int32 level = current->td_level;
1617 kmp_taskdata_t * parent = taskdata->td_parent;
1618 while ( parent != current && parent->td_level > level ) {
1619 parent = parent->td_parent; // check generation up to the level of the current task
1620 KMP_DEBUG_ASSERT(parent != NULL);
1621 }
1622 if ( parent != current ) {
1623 // If the tail task is not a child, then no other childs can appear in the deque (?).
1624 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1625 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1626 "ntasks=%d head=%u tail=%u\n",
1627 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1628 task_team, victim_td->td.td_deque_ntasks,
1629 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1630 return NULL;
1631 }
1632 victim_td -> td.td_deque_tail = tail;
1633 }
1634 if (*thread_finished) {
1635 // We need to un-mark this victim as a finished victim. This must be done before
1636 // releasing the lock, or else other threads (starting with the master victim)
1637 // might be prematurely released from the barrier!!!
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001638 kmp_uint32 count;
1639
1640 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001641
1642 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1643 gtid, count + 1, task_team) );
1644
1645 *thread_finished = FALSE;
1646 }
1647 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1648
1649 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1650
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001651 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001652 "ntasks=%d head=%u tail=%u\n",
1653 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1654 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1655 victim_td->td.td_deque_tail) );
1656
1657 task = KMP_TASKDATA_TO_TASK( taskdata );
1658 return task;
1659}
1660
1661
1662//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001663// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001664// is statisfied (return true) or there are none left (return false).
1665// final_spin is TRUE if this is the spin at the release barrier.
1666// thread_finished indicates whether the thread is finished executing all
1667// the tasks it has on its deque, and is at the release barrier.
1668// spinner is the location on which to spin.
1669// spinner == NULL means only execute a single task and return.
1670// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001671template <class C>
1672static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1673 int *thread_finished
1674 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001675{
1676 kmp_task_team_t * task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001677 kmp_thread_data_t * threads_data;
1678 kmp_task_t * task;
1679 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1680 volatile kmp_uint32 * unfinished_threads;
1681 kmp_int32 nthreads, last_stolen, k, tid;
1682
1683 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1684 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1685
1686 task_team = thread -> th.th_task_team;
1687 KMP_DEBUG_ASSERT( task_team != NULL );
1688
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001689 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001690 gtid, final_spin, *thread_finished) );
1691
1692 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1693 KMP_DEBUG_ASSERT( threads_data != NULL );
1694
1695 nthreads = task_team -> tt.tt_nproc;
1696 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001697#if OMP_41_ENABLED
1698 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1699#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001700 KMP_DEBUG_ASSERT( nthreads > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001701#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001702 KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1703
1704 // Choose tasks from our own work queue.
1705 start:
1706 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1707#if USE_ITT_BUILD && USE_ITT_NOTIFY
1708 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1709 if ( itt_sync_obj == NULL ) {
1710 // we are at fork barrier where we could not get the object reliably
1711 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1712 }
1713 __kmp_itt_task_starting( itt_sync_obj );
1714 }
1715#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1716 __kmp_invoke_task( gtid, task, current_task );
1717#if USE_ITT_BUILD
1718 if ( itt_sync_obj != NULL )
1719 __kmp_itt_task_finished( itt_sync_obj );
1720#endif /* USE_ITT_BUILD */
1721
1722 // If this thread is only partway through the barrier and the condition
1723 // is met, then return now, so that the barrier gather/release pattern can proceed.
1724 // If this thread is in the last spin loop in the barrier, waiting to be
1725 // released, we know that the termination condition will not be satisified,
1726 // so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001727 if (flag == NULL || (!final_spin && flag->done_check())) {
1728 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001729 return TRUE;
1730 }
1731 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1732 }
1733
1734 // This thread's work queue is empty. If we are in the final spin loop
1735 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001736#if OMP_41_ENABLED
1737 // The work queue may be empty but there might be proxy tasks still executing
1738 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1739#else
1740 if (final_spin)
1741#endif
1742 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001743 // First, decrement the #unfinished threads, if that has not already
1744 // been done. This decrement might be to the spin location, and
1745 // result in the termination condition being satisfied.
1746 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001747 kmp_uint32 count;
1748
1749 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001750 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001751 gtid, count, task_team) );
1752 *thread_finished = TRUE;
1753 }
1754
1755 // It is now unsafe to reference thread->th.th_team !!!
1756 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1757 // thread to pass through the barrier, where it might reset each thread's
1758 // th.th_team field for the next parallel region.
1759 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001760 if (flag != NULL && flag->done_check()) {
1761 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001762 return TRUE;
1763 }
1764 }
1765
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001766#if OMP_41_ENABLED
1767 // check if there are other threads to steal from, otherwise go back
1768 if ( nthreads == 1 )
1769 goto start;
1770#endif
1771
Jim Cownie5e8470a2013-09-27 10:38:44 +00001772 // Try to steal from the last place I stole from successfully.
1773 tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1774 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1775
1776 if (last_stolen != -1) {
1777 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1778
1779 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1780 thread_finished, is_constrained )) != NULL)
1781 {
1782#if USE_ITT_BUILD && USE_ITT_NOTIFY
1783 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1784 if ( itt_sync_obj == NULL ) {
1785 // we are at fork barrier where we could not get the object reliably
1786 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1787 }
1788 __kmp_itt_task_starting( itt_sync_obj );
1789 }
1790#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1791 __kmp_invoke_task( gtid, task, current_task );
1792#if USE_ITT_BUILD
1793 if ( itt_sync_obj != NULL )
1794 __kmp_itt_task_finished( itt_sync_obj );
1795#endif /* USE_ITT_BUILD */
1796
1797 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001798 if (flag == NULL || (!final_spin && flag->done_check())) {
1799 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001800 gtid) );
1801 return TRUE;
1802 }
1803
1804 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1805 // If the execution of the stolen task resulted in more tasks being
1806 // placed on our run queue, then restart the whole process.
1807 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001808 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001809 gtid) );
1810 goto start;
1811 }
1812 }
1813
1814 // Don't give priority to stealing from this thread anymore.
1815 threads_data[ tid ].td.td_deque_last_stolen = -1;
1816
1817 // The victims's work queue is empty. If we are in the final spin loop
1818 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001819#if OMP_41_ENABLED
1820 // The work queue may be empty but there might be proxy tasks still executing
1821 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1822#else
1823 if (final_spin)
1824#endif
1825 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001826 // First, decrement the #unfinished threads, if that has not already
1827 // been done. This decrement might be to the spin location, and
1828 // result in the termination condition being satisfied.
1829 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001830 kmp_uint32 count;
1831
1832 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001833 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001834 "task_team=%p\n", gtid, count, task_team) );
1835 *thread_finished = TRUE;
1836 }
1837
1838 // If __kmp_tasking_mode != tskm_immediate_exec
1839 // then it is now unsafe to reference thread->th.th_team !!!
1840 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1841 // thread to pass through the barrier, where it might reset each thread's
1842 // th.th_team field for the next parallel region.
1843 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001844 if (flag != NULL && flag->done_check()) {
1845 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001846 gtid) );
1847 return TRUE;
1848 }
1849 }
1850 }
1851
1852 // Find a different thread to steal work from. Pick a random thread.
1853 // My initial plan was to cycle through all the threads, and only return
1854 // if we tried to steal from every thread, and failed. Arch says that's
1855 // not such a great idea.
1856 // GEH - need yield code in this loop for throughput library mode?
1857 new_victim:
1858 k = __kmp_get_random( thread ) % (nthreads - 1);
1859 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1860 ++k; // Adjusts random distribution to exclude self
1861 }
1862 {
1863 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1864 int first;
1865
1866 // There is a slight chance that __kmp_enable_tasking() did not wake up
1867 // all threads waiting at the barrier. If this thread is sleeping, then
1868 // then wake it up. Since we weree going to pay the cache miss penalty
1869 // for referenceing another thread's kmp_info_t struct anyway, the check
1870 // shouldn't cost too much performance at this point.
1871 // In extra barrier mode, tasks do not sleep at the separate tasking
1872 // barrier, so this isn't a problem.
1873 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1874 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1875 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1876 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001877 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001878 // A sleeping thread should not have any tasks on it's queue.
Alp Toker8f2d3f02014-02-24 10:40:15 +00001879 // There is a slight possibility that it resumes, steals a task from
Jim Cownie5e8470a2013-09-27 10:38:44 +00001880 // another thread, which spawns more tasks, all in the that it takes
1881 // this thread to check => don't write an assertion that the victim's
1882 // queue is empty. Try stealing from a different thread.
1883 goto new_victim;
1884 }
1885
1886 // Now try to steal work from the selected thread
1887 first = TRUE;
1888 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1889 thread_finished, is_constrained )) != NULL)
1890 {
1891#if USE_ITT_BUILD && USE_ITT_NOTIFY
1892 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1893 if ( itt_sync_obj == NULL ) {
1894 // we are at fork barrier where we could not get the object reliably
1895 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1896 }
1897 __kmp_itt_task_starting( itt_sync_obj );
1898 }
1899#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1900 __kmp_invoke_task( gtid, task, current_task );
1901#if USE_ITT_BUILD
1902 if ( itt_sync_obj != NULL )
1903 __kmp_itt_task_finished( itt_sync_obj );
1904#endif /* USE_ITT_BUILD */
1905
1906 // Try stealing from this victim again, in the future.
1907 if (first) {
1908 threads_data[ tid ].td.td_deque_last_stolen = k;
1909 first = FALSE;
1910 }
1911
1912 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001913 if (flag == NULL || (!final_spin && flag->done_check())) {
1914 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001915 gtid) );
1916 return TRUE;
1917 }
1918 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1919
1920 // If the execution of the stolen task resulted in more tasks being
1921 // placed on our run queue, then restart the whole process.
1922 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001923 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001924 gtid) );
1925 goto start;
1926 }
1927 }
1928
1929 // The victims's work queue is empty. If we are in the final spin loop
1930 // of the barrier, check and see if the termination condition is satisfied.
1931 // Going on and finding a new victim to steal from is expensive, as it
1932 // involves a lot of cache misses, so we definitely want to re-check the
1933 // termination condition before doing that.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001934#if OMP_41_ENABLED
1935 // The work queue may be empty but there might be proxy tasks still executing
1936 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1937#else
1938 if (final_spin)
1939#endif
1940 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001941 // First, decrement the #unfinished threads, if that has not already
1942 // been done. This decrement might be to the spin location, and
1943 // result in the termination condition being satisfied.
1944 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001945 kmp_uint32 count;
1946
1947 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001948 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001949 "task_team=%p\n",
1950 gtid, count, task_team) );
1951 *thread_finished = TRUE;
1952 }
1953
1954 // If __kmp_tasking_mode != tskm_immediate_exec,
1955 // then it is now unsafe to reference thread->th.th_team !!!
1956 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1957 // thread to pass through the barrier, where it might reset each thread's
1958 // th.th_team field for the next parallel region.
1959 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001960 if (flag != NULL && flag->done_check()) {
1961 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001962 return TRUE;
1963 }
1964 }
1965 }
1966
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001967 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001968 return FALSE;
1969}
1970
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001971int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1972 int *thread_finished
1973 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1974{
1975 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1976 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1977}
1978
1979int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
1980 int *thread_finished
1981 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1982{
1983 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1984 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1985}
1986
1987int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
1988 int *thread_finished
1989 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1990{
1991 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1992 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1993}
1994
1995
Jim Cownie5e8470a2013-09-27 10:38:44 +00001996
1997//-----------------------------------------------------------------------------
1998// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
1999// next barrier so they can assist in executing enqueued tasks.
2000// First thread in allocates the task team atomically.
2001
2002static void
2003__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2004{
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002005 kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002006 kmp_thread_data_t *threads_data;
2007 int nthreads, i, is_init_thread;
2008
2009 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2010 __kmp_gtid_from_thread( this_thr ) ) );
2011
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002012 team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002013 KMP_DEBUG_ASSERT(task_team != NULL);
2014 KMP_DEBUG_ASSERT(team != NULL);
2015
2016 nthreads = task_team->tt.tt_nproc;
2017 KMP_DEBUG_ASSERT(nthreads > 0);
2018 KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
2019
2020 // Allocate or increase the size of threads_data if necessary
2021 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2022
2023 if (!is_init_thread) {
2024 // Some other thread already set up the array.
2025 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2026 __kmp_gtid_from_thread( this_thr ) ) );
2027 return;
2028 }
2029 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2030 KMP_DEBUG_ASSERT( threads_data != NULL );
2031
2032 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2033 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2034 {
2035 // Release any threads sleeping at the barrier, so that they can steal
2036 // tasks and execute them. In extra barrier mode, tasks do not sleep
2037 // at the separate tasking barrier, so this isn't a problem.
2038 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002039 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002040 kmp_info_t *thread = threads_data[i].td.td_thr;
2041
2042 if (i == this_thr->th.th_info.ds.ds_tid) {
2043 continue;
2044 }
2045 // Since we haven't locked the thread's suspend mutex lock at this
2046 // point, there is a small window where a thread might be putting
2047 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002048 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002049 // see if other threads are sleeping (using the same random
2050 // mechanism that is used for task stealing) and awakens them if
2051 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002052 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002053 {
2054 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2055 __kmp_gtid_from_thread( this_thr ),
2056 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002057 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002058 }
2059 else {
2060 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2061 __kmp_gtid_from_thread( this_thr ),
2062 __kmp_gtid_from_thread( thread ) ) );
2063 }
2064 }
2065 }
2066
2067 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2068 __kmp_gtid_from_thread( this_thr ) ) );
2069}
2070
2071
2072/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002073/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00002074 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2075 * like a shadow of the kmp_team_t data struct, with a different lifetime.
2076 * After a child * thread checks into a barrier and calls __kmp_release() from
2077 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2078 * longer assume that the kmp_team_t structure is intact (at any moment, the
2079 * master thread may exit the barrier code and free the team data structure,
2080 * and return the threads to the thread pool).
2081 *
2082 * This does not work with the the tasking code, as the thread is still
2083 * expected to participate in the execution of any tasks that may have been
2084 * spawned my a member of the team, and the thread still needs access to all
2085 * to each thread in the team, so that it can steal work from it.
2086 *
2087 * Enter the existence of the kmp_task_team_t struct. It employs a reference
2088 * counting mechanims, and is allocated by the master thread before calling
2089 * __kmp_<barrier_kind>_release, and then is release by the last thread to
2090 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2091 * of the kmp_task_team_t structs for consecutive barriers can overlap
2092 * (and will, unless the master thread is the last thread to exit the barrier
2093 * release phase, which is not typical).
2094 *
2095 * The existence of such a struct is useful outside the context of tasking,
2096 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2097 * so that any performance differences show up when comparing the 2.5 vs. 3.0
2098 * libraries.
2099 *
2100 * We currently use the existence of the threads array as an indicator that
2101 * tasks were spawned since the last barrier. If the structure is to be
2102 * useful outside the context of tasking, then this will have to change, but
2103 * not settting the field minimizes the performance impact of tasking on
2104 * barriers, when no explicit tasks were spawned (pushed, actually).
2105 */
2106
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002107
Jim Cownie5e8470a2013-09-27 10:38:44 +00002108static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2109// Lock for task team data structures
2110static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2111
2112
2113//------------------------------------------------------------------------------
2114// __kmp_alloc_task_deque:
2115// Allocates a task deque for a particular thread, and initialize the necessary
2116// data structures relating to the deque. This only happens once per thread
2117// per task team since task teams are recycled.
2118// No lock is needed during allocation since each thread allocates its own
2119// deque.
2120
2121static void
2122__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2123{
2124 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2125 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2126
2127 // Initialize last stolen task field to "none"
2128 thread_data -> td.td_deque_last_stolen = -1;
2129
2130 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2131 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2132 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2133
2134 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2135 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2136 // Allocate space for task deque, and zero the deque
2137 // Cannot use __kmp_thread_calloc() because threads not around for
2138 // kmp_reap_task_team( ).
2139 thread_data -> td.td_deque = (kmp_taskdata_t **)
2140 __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2141}
2142
2143
2144//------------------------------------------------------------------------------
2145// __kmp_free_task_deque:
2146// Deallocates a task deque for a particular thread.
2147// Happens at library deallocation so don't need to reset all thread data fields.
2148
2149static void
2150__kmp_free_task_deque( kmp_thread_data_t *thread_data )
2151{
2152 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2153
2154 if ( thread_data -> td.td_deque != NULL ) {
2155 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2156 __kmp_free( thread_data -> td.td_deque );
2157 thread_data -> td.td_deque = NULL;
2158 }
2159 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2160
2161#ifdef BUILD_TIED_TASK_STACK
2162 // GEH: Figure out what to do here for td_susp_tied_tasks
2163 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2164 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2165 }
2166#endif // BUILD_TIED_TASK_STACK
2167}
2168
2169
2170//------------------------------------------------------------------------------
2171// __kmp_realloc_task_threads_data:
2172// Allocates a threads_data array for a task team, either by allocating an initial
2173// array or enlarging an existing array. Only the first thread to get the lock
2174// allocs or enlarges the array and re-initializes the array eleemnts.
2175// That thread returns "TRUE", the rest return "FALSE".
2176// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2177// The current size is given by task_team -> tt.tt_max_threads.
2178
2179static int
2180__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2181{
2182 kmp_thread_data_t ** threads_data_p;
2183 kmp_int32 nthreads, maxthreads;
2184 int is_init_thread = FALSE;
2185
2186 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2187 // Already reallocated and initialized.
2188 return FALSE;
2189 }
2190
2191 threads_data_p = & task_team -> tt.tt_threads_data;
2192 nthreads = task_team -> tt.tt_nproc;
2193 maxthreads = task_team -> tt.tt_max_threads;
2194
2195 // All threads must lock when they encounter the first task of the implicit task
2196 // region to make sure threads_data fields are (re)initialized before used.
2197 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2198
2199 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2200 // first thread to enable tasking
2201 kmp_team_t *team = thread -> th.th_team;
2202 int i;
2203
2204 is_init_thread = TRUE;
2205 if ( maxthreads < nthreads ) {
2206
2207 if ( *threads_data_p != NULL ) {
2208 kmp_thread_data_t *old_data = *threads_data_p;
2209 kmp_thread_data_t *new_data = NULL;
2210
2211 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2212 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2213 __kmp_gtid_from_thread( thread ), task_team,
2214 nthreads, maxthreads ) );
2215 // Reallocate threads_data to have more elements than current array
2216 // Cannot use __kmp_thread_realloc() because threads not around for
2217 // kmp_reap_task_team( ). Note all new array entries are initialized
2218 // to zero by __kmp_allocate().
2219 new_data = (kmp_thread_data_t *)
2220 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2221 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002222 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002223 (void *) old_data,
2224 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002225
2226#ifdef BUILD_TIED_TASK_STACK
2227 // GEH: Figure out if this is the right thing to do
2228 for (i = maxthreads; i < nthreads; i++) {
2229 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2230 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2231 }
2232#endif // BUILD_TIED_TASK_STACK
2233 // Install the new data and free the old data
2234 (*threads_data_p) = new_data;
2235 __kmp_free( old_data );
2236 }
2237 else {
2238 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2239 "threads data for task_team %p, size = %d\n",
2240 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2241 // Make the initial allocate for threads_data array, and zero entries
2242 // Cannot use __kmp_thread_calloc() because threads not around for
2243 // kmp_reap_task_team( ).
2244 *threads_data_p = (kmp_thread_data_t *)
2245 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2246#ifdef BUILD_TIED_TASK_STACK
2247 // GEH: Figure out if this is the right thing to do
2248 for (i = 0; i < nthreads; i++) {
2249 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2250 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2251 }
2252#endif // BUILD_TIED_TASK_STACK
2253 }
2254 task_team -> tt.tt_max_threads = nthreads;
2255 }
2256 else {
2257 // If array has (more than) enough elements, go ahead and use it
2258 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2259 }
2260
2261 // initialize threads_data pointers back to thread_info structures
2262 for (i = 0; i < nthreads; i++) {
2263 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2264 thread_data -> td.td_thr = team -> t.t_threads[i];
2265
2266 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2267 // The last stolen field survives across teams / barrier, and the number
2268 // of threads may have changed. It's possible (likely?) that a new
2269 // parallel region will exhibit the same behavior as the previous region.
2270 thread_data -> td.td_deque_last_stolen = -1;
2271 }
2272 }
2273
2274 KMP_MB();
2275 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2276 }
2277
2278 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2279 return is_init_thread;
2280}
2281
2282
2283//------------------------------------------------------------------------------
2284// __kmp_free_task_threads_data:
2285// Deallocates a threads_data array for a task team, including any attached
2286// tasking deques. Only occurs at library shutdown.
2287
2288static void
2289__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2290{
2291 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2292 if ( task_team -> tt.tt_threads_data != NULL ) {
2293 int i;
2294 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2295 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2296 }
2297 __kmp_free( task_team -> tt.tt_threads_data );
2298 task_team -> tt.tt_threads_data = NULL;
2299 }
2300 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2301}
2302
2303
2304//------------------------------------------------------------------------------
2305// __kmp_allocate_task_team:
2306// Allocates a task team associated with a specific team, taking it from
2307// the global task team free list if possible. Also initializes data structures.
2308
2309static kmp_task_team_t *
2310__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2311{
2312 kmp_task_team_t *task_team = NULL;
2313 int nthreads;
2314
2315 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2316 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2317
2318 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2319 // Take a task team from the task team pool
2320 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2321 if (__kmp_free_task_teams != NULL) {
2322 task_team = __kmp_free_task_teams;
2323 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2324 task_team -> tt.tt_next = NULL;
2325 }
2326 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2327 }
2328
2329 if (task_team == NULL) {
2330 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2331 "task team for team %p\n",
2332 __kmp_gtid_from_thread( thread ), team ) );
2333 // Allocate a new task team if one is not available.
2334 // Cannot use __kmp_thread_malloc() because threads not around for
2335 // kmp_reap_task_team( ).
2336 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2337 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2338 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2339 //task_team -> tt.tt_max_threads = 0;
2340 //task_team -> tt.tt_next = NULL;
2341 }
2342
2343 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002344#if OMP_41_ENABLED
2345 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2346#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002347 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2348
Jim Cownie5e8470a2013-09-27 10:38:44 +00002349 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2350 TCW_4( task_team -> tt.tt_active, TRUE );
2351 TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2352
2353 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2354 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2355 return task_team;
2356}
2357
2358
2359//------------------------------------------------------------------------------
2360// __kmp_free_task_team:
2361// Frees the task team associated with a specific thread, and adds it
2362// to the global task team free list.
2363//
2364
2365static void
2366__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2367{
2368 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2369 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2370
2371 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2372
2373 // Put task team back on free list
2374 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2375
2376 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2377 task_team -> tt.tt_next = __kmp_free_task_teams;
2378 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2379 TCW_PTR(__kmp_free_task_teams, task_team);
2380
2381 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2382}
2383
2384
2385//------------------------------------------------------------------------------
2386// __kmp_reap_task_teams:
2387// Free all the task teams on the task team free list.
2388// Should only be done during library shutdown.
2389// Cannot do anything that needs a thread structure or gtid since they are already gone.
2390
2391void
2392__kmp_reap_task_teams( void )
2393{
2394 kmp_task_team_t *task_team;
2395
2396 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2397 // Free all task_teams on the free list
2398 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2399 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2400 __kmp_free_task_teams = task_team -> tt.tt_next;
2401 task_team -> tt.tt_next = NULL;
2402
2403 // Free threads_data if necessary
2404 if ( task_team -> tt.tt_threads_data != NULL ) {
2405 __kmp_free_task_threads_data( task_team );
2406 }
2407 __kmp_free( task_team );
2408 }
2409 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2410 }
2411}
2412
2413
2414//------------------------------------------------------------------------------
2415// __kmp_unref_task_teams:
2416// Remove one thread from referencing the task team structure by
2417// decreasing the reference count and deallocate task team if no more
2418// references to it.
2419//
2420void
2421__kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2422{
2423 kmp_uint ref_ct;
2424
2425 ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2426
2427 KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2428 __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2429
2430
2431 if ( ref_ct == 0 ) {
2432 __kmp_free_task_team( thread, task_team );
2433 }
2434
2435 TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2436}
2437
2438
2439//------------------------------------------------------------------------------
2440// __kmp_wait_to_unref_task_teams:
2441// Some threads could still be in the fork barrier release code, possibly
2442// trying to steal tasks. Wait for each thread to unreference its task team.
2443//
2444void
2445__kmp_wait_to_unref_task_teams(void)
2446{
2447 kmp_info_t *thread;
2448 kmp_uint32 spins;
2449 int done;
2450
2451 KMP_INIT_YIELD( spins );
2452
2453
2454 for (;;) {
2455 done = TRUE;
2456
2457 // TODO: GEH - this may be is wrong because some sync would be necessary
2458 // in case threads are added to the pool during the traversal.
2459 // Need to verify that lock for thread pool is held when calling
2460 // this routine.
2461 for (thread = (kmp_info_t *)__kmp_thread_pool;
2462 thread != NULL;
2463 thread = thread->th.th_next_pool)
2464 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002465#if KMP_OS_WINDOWS
2466 DWORD exit_val;
2467#endif
2468 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2469 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2470 __kmp_gtid_from_thread( thread ) ) );
2471 continue;
2472 }
2473#if KMP_OS_WINDOWS
2474 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2475 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2476 if (TCR_PTR(thread->th.th_task_team) != NULL) {
2477 __kmp_unref_task_team( thread->th.th_task_team, thread );
2478 }
2479 continue;
2480 }
2481#endif
2482
2483 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2484
2485 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2486 __kmp_gtid_from_thread( thread ) ) );
2487
2488 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002489 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002490 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002491 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002492 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2493 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002494 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002495 }
2496 }
2497 }
2498 if (done) {
2499 break;
2500 }
2501
2502 // If we are oversubscribed,
2503 // or have waited a bit (and library mode is throughput), yield.
2504 // Pause is in the following code.
2505 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2506 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2507 }
2508
2509
2510}
2511
2512
2513//------------------------------------------------------------------------------
2514// __kmp_task_team_setup: Create a task_team for the current team, but use
2515// an already created, unused one if it already exists.
2516// This may be called by any thread, but only for teams with # threads >1.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002517void
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002518__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both, int always )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002519{
2520 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2521
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002522 if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( always || team->t.t_nproc > 1 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002523 // Allocate a new task team, which will be propagated to
2524 // all of the worker threads after the barrier. As they
2525 // spin in the barrier release phase, then will continue
2526 // to use the previous task team struct, until they receive
2527 // the signal to stop checking for tasks (they can't safely
2528 // reference the kmp_team_t struct, which could be reallocated
2529 // by the master thread).
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002530 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2531 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2532 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jim Cownie5e8470a2013-09-27 10:38:44 +00002533 ((team != NULL) ? team->t.t_id : -1)) );
2534 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002535 //else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002536 // All threads have reported in, and no tasks were spawned
2537 // for this release->gather region. Leave the old task
2538 // team struct in place for the upcoming region. No task
2539 // teams are formed for serialized teams.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002540 if (both) {
2541 int other_team = 1 - this_thr->th.th_task_state;
2542 if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well
2543 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2544 KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2545 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2546 ((team != NULL) ? team->t.t_id : -1)) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002547 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002548 }
2549}
2550
2551
2552//------------------------------------------------------------------------------
2553// __kmp_task_team_sync: Propagation of task team data from team to threads
2554// which happens just after the release phase of a team barrier. This may be
2555// called by any thread, but only for teams with # threads > 1.
2556
2557void
2558__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2559{
2560 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2561
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002562 // In case this thread never saw that the task team was no longer active, unref/deallocate it now.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002563 if ( this_thr->th.th_task_team != NULL ) {
2564 if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2565 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2566 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002567 } else { // We are re-using a task team that was never enabled.
2568 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002569 }
2570 }
2571
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002572 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jim Cownie5e8470a2013-09-27 10:38:44 +00002573 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002574 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2575 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002576 KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2577 __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2578 this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2579}
2580
2581
2582//------------------------------------------------------------------------------
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002583// __kmp_task_team_wait: Master thread waits for outstanding tasks after the
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002584// barrier gather phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created
Jim Cownie5e8470a2013-09-27 10:38:44 +00002585void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002586__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002587 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002588 )
2589{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002590 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002591
2592 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2593 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2594
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002595 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002596 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2597 __kmp_gtid_from_thread( this_thr ), task_team ) );
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002598 // All worker threads might have dropped through to the release phase, but could still
2599 // be executing tasks. Wait here for all tasks to complete. To avoid memory contention,
2600 // only the master thread checks for the termination condition.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002601 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2602 flag.wait(this_thr, TRUE
2603 USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002604
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002605 // Kill the old task team, so that the worker threads will stop referencing it while spinning.
2606 // They will deallocate it when the reference count reaches zero.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002607 // The master thread is not included in the ref count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002608 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2609 __kmp_gtid_from_thread( this_thr ), task_team ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002610#if OMP_41_ENABLED
2611 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2612 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2613#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002614 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002615#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002616 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2617 KMP_MB();
2618
2619 TCW_PTR(this_thr->th.th_task_team, NULL);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002620 team->t.t_task_team[this_thr->th.th_task_state] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002621 }
2622}
2623
2624
2625//------------------------------------------------------------------------------
2626// __kmp_tasking_barrier:
2627// Internal function to execute all tasks prior to a regular barrier or a
2628// join barrier. It is a full barrier itself, which unfortunately turns
2629// regular barriers into double barriers and join barriers into 1 1/2
2630// barriers.
2631// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2632
2633void
2634__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2635{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002636 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002637 int flag = FALSE;
2638 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2639
2640#if USE_ITT_BUILD
2641 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2642#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002643 kmp_flag_32 spin_flag(spin, 0U);
2644 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2645 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002646#if USE_ITT_BUILD
2647 // TODO: What about itt_sync_obj??
2648 KMP_FSYNC_SPIN_PREPARE( spin );
2649#endif /* USE_ITT_BUILD */
2650
2651 if( TCR_4(__kmp_global.g.g_done) ) {
2652 if( __kmp_global.g.g_abort )
2653 __kmp_abort_thread( );
2654 break;
2655 }
2656 KMP_YIELD( TRUE ); // GH: We always yield here
2657 }
2658#if USE_ITT_BUILD
2659 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2660#endif /* USE_ITT_BUILD */
2661}
2662
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002663
2664#if OMP_41_ENABLED
2665
2666/* __kmp_give_task puts a task into a given thread queue if:
2667 - the queue for that thread it was created
2668 - there's space in that queue
2669
2670 Because of this, __kmp_push_task needs to check if there's space after getting the lock
2671 */
2672static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2673{
2674 kmp_task_team_t * task_team = thread->th.th_task_team;
2675 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2676 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2677 bool result = false;
2678
2679 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2680
2681 // assert tasking is enabled? what if not?
2682 KMP_DEBUG_ASSERT( task_team != NULL );
2683
2684 if (thread_data -> td.td_deque == NULL ) {
2685 // There's no queue in this thread, go find another one
2686 // We're guaranteed that at least one thread has a queue
2687 KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2688 return result;
2689 }
2690
2691 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2692 {
2693 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2694 return result;
2695 }
2696
2697 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2698
2699 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2700 {
2701 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2702 goto release_and_exit;
2703 }
2704
2705 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2706 // Wrap index.
2707 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2708 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2709
2710 result = true;
Jonathan Peyton1406f012015-05-22 22:35:51 +00002711 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002712
2713release_and_exit:
2714 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2715
2716 return result;
2717}
2718
2719
2720/* The finish of the a proxy tasks is divided in two pieces:
2721 - the top half is the one that can be done from a thread outside the team
2722 - the bottom half must be run from a them within the team
2723
2724 In order to run the bottom half the task gets queued back into one of the threads of the team.
2725 Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2726 So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2727 - things that can be run before queuing the bottom half
2728 - things that must be run after queuing the bottom half
2729
2730 This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2731 we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2732*/
2733
2734static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2735{
2736 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2737 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2738 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2739 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2740
2741 taskdata -> td_flags.complete = 1; // mark the task as completed
2742
2743 if ( taskdata->td_taskgroup )
2744 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2745
2746 // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
2747 TCR_4(taskdata->td_incomplete_child_tasks++);
2748}
2749
2750static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2751{
2752 kmp_int32 children = 0;
2753
2754 // Predecrement simulated by "- 1" calculation
2755 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2756 KMP_DEBUG_ASSERT( children >= 0 );
2757
2758 // Remove the imaginary children
2759 TCR_4(taskdata->td_incomplete_child_tasks--);
2760}
2761
2762static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2763{
2764 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2765 kmp_info_t * thread = __kmp_threads[ gtid ];
2766
2767 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2768 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2769
2770 // We need to wait to make sure the top half is finished
2771 // Spinning here should be ok as this should happen quickly
2772 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2773
2774 __kmp_release_deps(gtid,taskdata);
2775 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2776}
2777
2778/*!
2779@ingroup TASKING
2780@param gtid Global Thread ID of encountering thread
2781@param ptask Task which execution is completed
2782
2783Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
2784*/
2785void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2786{
2787 KMP_DEBUG_ASSERT( ptask != NULL );
2788 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2789 KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2790
2791 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2792
2793 __kmp_first_top_half_finish_proxy(taskdata);
2794 __kmp_second_top_half_finish_proxy(taskdata);
2795 __kmp_bottom_half_finish_proxy(gtid,ptask);
2796
2797 KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2798}
2799
2800/*!
2801@ingroup TASKING
2802@param ptask Task which execution is completed
2803
2804Execute the completation of a proxy task from a thread that could not belong to the team.
2805*/
2806void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2807{
2808 KMP_DEBUG_ASSERT( ptask != NULL );
2809 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2810
2811 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2812
2813 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2814
2815 __kmp_first_top_half_finish_proxy(taskdata);
2816
2817 // Enqueue task to complete bottom half completation from a thread within the corresponding team
2818 kmp_team_t * team = taskdata->td_team;
2819 kmp_int32 nthreads = team->t.t_nproc;
2820 kmp_info_t *thread;
2821 kmp_int32 k = 0;
2822
2823 do {
Jonathan Peyton1406f012015-05-22 22:35:51 +00002824 //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002825 //For now we're just linearly trying to find a thread
2826 k = (k+1) % nthreads;
2827 thread = team->t.t_threads[k];
2828 } while ( !__kmp_give_task( thread, k, ptask ) );
2829
2830 __kmp_second_top_half_finish_proxy(taskdata);
2831
2832 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2833}
2834
2835#endif