blob: 818a37184065cd070145928aa0cf1e068448922c [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000020
Andrey Churbanove5f44922015-04-29 16:22:07 +000021#if OMPT_SUPPORT
22#include "ompt-specific.h"
23#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000024
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
26/* ------------------------------------------------------------------------ */
27/* ------------------------------------------------------------------------ */
28
29
30/* forward declaration */
31static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
34
Jim Cownie4cc4bb42014-10-07 16:25:50 +000035static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
36 switch (((kmp_flag_64 *)flag)->get_type()) {
37 case flag32: __kmp_resume_32(gtid, NULL); break;
38 case flag64: __kmp_resume_64(gtid, NULL); break;
39 case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
40 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000041}
42
43#ifdef BUILD_TIED_TASK_STACK
44
45//---------------------------------------------------------------------------
46// __kmp_trace_task_stack: print the tied tasks from the task stack in order
47// from top do bottom
48//
49// gtid: global thread identifier for thread containing stack
50// thread_data: thread data for task team thread containing stack
51// threshold: value above which the trace statement triggers
52// location: string identifying call site of this function (for trace)
53
54static void
55__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
56{
57 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
58 kmp_taskdata_t **stack_top = task_stack -> ts_top;
59 kmp_int32 entries = task_stack -> ts_entries;
60 kmp_taskdata_t *tied_task;
61
62 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
63 "first_block = %p, stack_top = %p \n",
64 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
65
66 KMP_DEBUG_ASSERT( stack_top != NULL );
67 KMP_DEBUG_ASSERT( entries > 0 );
68
69 while ( entries != 0 )
70 {
71 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
72 // fix up ts_top if we need to pop from previous block
73 if ( entries & TASK_STACK_INDEX_MASK == 0 )
74 {
75 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
76
77 stack_block = stack_block -> sb_prev;
78 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
79 }
80
81 // finish bookkeeping
82 stack_top--;
83 entries--;
84
85 tied_task = * stack_top;
86
87 KMP_DEBUG_ASSERT( tied_task != NULL );
88 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
89
90 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
91 "stack_top=%p, tied_task=%p\n",
92 location, gtid, entries, stack_top, tied_task ) );
93 }
94 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
95
96 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
97 location, gtid ) );
98}
99
100//---------------------------------------------------------------------------
101// __kmp_init_task_stack: initialize the task stack for the first time
102// after a thread_data structure is created.
103// It should not be necessary to do this again (assuming the stack works).
104//
105// gtid: global thread identifier of calling thread
106// thread_data: thread data for task team thread containing stack
107
108static void
109__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
110{
111 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
112 kmp_stack_block_t *first_block;
113
114 // set up the first block of the stack
115 first_block = & task_stack -> ts_first_block;
116 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
117 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
118
119 // initialize the stack to be empty
120 task_stack -> ts_entries = TASK_STACK_EMPTY;
121 first_block -> sb_next = NULL;
122 first_block -> sb_prev = NULL;
123}
124
125
126//---------------------------------------------------------------------------
127// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
128//
129// gtid: global thread identifier for calling thread
130// thread_data: thread info for thread containing stack
131
132static void
133__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
134{
135 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
136 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
137
138 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
139 // free from the second block of the stack
140 while ( stack_block != NULL ) {
141 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
142
143 stack_block -> sb_next = NULL;
144 stack_block -> sb_prev = NULL;
145 if (stack_block != & task_stack -> ts_first_block) {
146 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
147 }
148 stack_block = next_block;
149 }
150 // initialize the stack to be empty
151 task_stack -> ts_entries = 0;
152 task_stack -> ts_top = NULL;
153}
154
155
156//---------------------------------------------------------------------------
157// __kmp_push_task_stack: Push the tied task onto the task stack.
158// Grow the stack if necessary by allocating another block.
159//
160// gtid: global thread identifier for calling thread
161// thread: thread info for thread containing stack
162// tied_task: the task to push on the stack
163
164static void
165__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
166{
167 // GEH - need to consider what to do if tt_threads_data not allocated yet
168 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
169 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
170 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
171
172 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
173 return; // Don't push anything on stack if team or team tasks are serialized
174 }
175
176 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
177 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
178
179 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
180 gtid, thread, tied_task ) );
181 // Store entry
182 * (task_stack -> ts_top) = tied_task;
183
184 // Do bookkeeping for next push
185 task_stack -> ts_top++;
186 task_stack -> ts_entries++;
187
188 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
189 {
190 // Find beginning of this task block
191 kmp_stack_block_t *stack_block =
192 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
193
194 // Check if we already have a block
195 if ( stack_block -> sb_next != NULL )
196 { // reset ts_top to beginning of next block
197 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
198 }
199 else
200 { // Alloc new block and link it up
201 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
202 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
203
204 task_stack -> ts_top = & new_block -> sb_block[0];
205 stack_block -> sb_next = new_block;
206 new_block -> sb_prev = stack_block;
207 new_block -> sb_next = NULL;
208
209 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
210 gtid, tied_task, new_block ) );
211 }
212 }
213 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
214}
215
216//---------------------------------------------------------------------------
217// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
218// the task, just check to make sure it matches the ending task passed in.
219//
220// gtid: global thread identifier for the calling thread
221// thread: thread info structure containing stack
222// tied_task: the task popped off the stack
223// ending_task: the task that is ending (should match popped task)
224
225static void
226__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
227{
228 // GEH - need to consider what to do if tt_threads_data not allocated yet
229 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
230 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
231 kmp_taskdata_t *tied_task;
232
233 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
234 return; // Don't pop anything from stack if team or team tasks are serialized
235 }
236
237 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
238 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
239
240 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
241
242 // fix up ts_top if we need to pop from previous block
243 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
244 {
245 kmp_stack_block_t *stack_block =
246 (kmp_stack_block_t *) (task_stack -> ts_top) ;
247
248 stack_block = stack_block -> sb_prev;
249 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
250 }
251
252 // finish bookkeeping
253 task_stack -> ts_top--;
254 task_stack -> ts_entries--;
255
256 tied_task = * (task_stack -> ts_top );
257
258 KMP_DEBUG_ASSERT( tied_task != NULL );
259 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
260 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
261
262 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
263 return;
264}
265#endif /* BUILD_TIED_TASK_STACK */
266
267//---------------------------------------------------
268// __kmp_push_task: Add a task to the thread's deque
269
270static kmp_int32
271__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
272{
273 kmp_info_t * thread = __kmp_threads[ gtid ];
274 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
275 kmp_task_team_t * task_team = thread->th.th_task_team;
276 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
277 kmp_thread_data_t * thread_data;
278
279 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
280
281 // The first check avoids building task_team thread data if serialized
282 if ( taskdata->td_flags.task_serial ) {
283 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
284 gtid, taskdata ) );
285 return TASK_NOT_PUSHED;
286 }
287
288 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
289 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000290 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000291 __kmp_enable_tasking( task_team, thread );
292 }
293 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
294 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
295
296 // Find tasking deque specific to encountering thread
297 thread_data = & task_team -> tt.tt_threads_data[ tid ];
298
299 // No lock needed since only owner can allocate
300 if (thread_data -> td.td_deque == NULL ) {
301 __kmp_alloc_task_deque( thread, thread_data );
302 }
303
304 // Check if deque is full
305 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
306 {
307 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
308 gtid, taskdata ) );
309 return TASK_NOT_PUSHED;
310 }
311
312 // Lock the deque for the task push operation
313 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
314
315 // Must have room since no thread can add tasks but calling thread
316 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
317
318 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
319 // Wrap index.
320 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
321 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
322
323 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
324
325 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
326 "task=%p ntasks=%d head=%u tail=%u\n",
327 gtid, taskdata, thread_data->td.td_deque_ntasks,
328 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
329
330 return TASK_SUCCESSFULLY_PUSHED;
331}
332
333
334//-----------------------------------------------------------------------------------------
335// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
336// this_thr: thread structure to set current_task in.
337
338void
339__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
340{
341 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
342 "curtask_parent=%p\n",
343 0, this_thr, this_thr -> th.th_current_task,
344 this_thr -> th.th_current_task -> td_parent ) );
345
346 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
347
348 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
349 "curtask_parent=%p\n",
350 0, this_thr, this_thr -> th.th_current_task,
351 this_thr -> th.th_current_task -> td_parent ) );
352}
353
354
355//---------------------------------------------------------------------------------------
356// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
357// this_thr: thread structure to set up
358// team: team for implicit task data
359// tid: thread within team to set up
360
361void
362__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
363{
364 // current task of the thread is a parent of the new just created implicit tasks of new team
365 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
366 "parent_task=%p\n",
367 tid, this_thr, this_thr->th.th_current_task,
368 team->t.t_implicit_task_taskdata[tid].td_parent ) );
369
370 KMP_DEBUG_ASSERT (this_thr != NULL);
371
372 if( tid == 0 ) {
373 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
374 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
375 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
376 }
377 } else {
378 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
379 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
380 }
381
382 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
383 "parent_task=%p\n",
384 tid, this_thr, this_thr->th.th_current_task,
385 team->t.t_implicit_task_taskdata[tid].td_parent ) );
386}
387
388
389//----------------------------------------------------------------------
390// __kmp_task_start: bookkeeping for a task starting execution
391// GTID: global thread id of calling thread
392// task: task starting execution
393// current_task: task suspending
394
395static void
396__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
397{
398 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
399 kmp_info_t * thread = __kmp_threads[ gtid ];
400
401 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
402 gtid, taskdata, current_task) );
403
404 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
405
406 // mark currently executing task as suspended
407 // TODO: GEH - make sure root team implicit task is initialized properly.
408 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
409 current_task -> td_flags.executing = 0;
410
411 // Add task to stack if tied
412#ifdef BUILD_TIED_TASK_STACK
413 if ( taskdata -> td_flags.tiedness == TASK_TIED )
414 {
415 __kmp_push_task_stack( gtid, thread, taskdata );
416 }
417#endif /* BUILD_TIED_TASK_STACK */
418
419 // mark starting task as executing and as current task
420 thread -> th.th_current_task = taskdata;
421
422 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
423 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
424 taskdata -> td_flags.started = 1;
425 taskdata -> td_flags.executing = 1;
426 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
427 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
428
429 // GEH TODO: shouldn't we pass some sort of location identifier here?
430 // APT: yes, we will pass location here.
431 // need to store current thread state (in a thread or taskdata structure)
432 // before setting work_state, otherwise wrong state is set after end of task
433
434 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
435 gtid, taskdata ) );
436
437 return;
438}
439
440
441//----------------------------------------------------------------------
442// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
443// loc_ref: source location information; points to beginning of task block.
444// gtid: global thread number.
445// task: task thunk for the started task.
446
447void
448__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
449{
450 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
451 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
452
453 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
454 gtid, loc_ref, taskdata, current_task ) );
455
456 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
457 __kmp_task_start( gtid, task, current_task );
458
459 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
460 gtid, loc_ref, taskdata ) );
461
462 return;
463}
464
465#ifdef TASK_UNUSED
466//----------------------------------------------------------------------
467// __kmpc_omp_task_begin: report that a given task has started execution
468// NEVER GENERATED BY COMPILER, DEPRECATED!!!
469
470void
471__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
472{
473 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
474
475 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
476 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
477
478 __kmp_task_start( gtid, task, current_task );
479
480 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
481 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
482
483 return;
484}
485#endif // TASK_UNUSED
486
487
488//-------------------------------------------------------------------------------------
489// __kmp_free_task: free the current task space and the space for shareds
490// gtid: Global thread ID of calling thread
491// taskdata: task to free
492// thread: thread data structure of caller
493
494static void
495__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
496{
497 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
498 gtid, taskdata) );
499
500 // Check to make sure all flags and counters have the correct values
501 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
502 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
503 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
504 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
505 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
506 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
507
508 taskdata->td_flags.freed = 1;
509 // deallocate the taskdata and shared variable blocks associated with this task
510 #if USE_FAST_MEMORY
511 __kmp_fast_free( thread, taskdata );
512 #else /* ! USE_FAST_MEMORY */
513 __kmp_thread_free( thread, taskdata );
514 #endif
515
516 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
517 gtid, taskdata) );
518}
519
520//-------------------------------------------------------------------------------------
521// __kmp_free_task_and_ancestors: free the current task and ancestors without children
522//
523// gtid: Global thread ID of calling thread
524// taskdata: task to free
525// thread: thread data structure of caller
526
527static void
528__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
529{
530 kmp_int32 children = 0;
531 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
532
533 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
534
535 if ( !team_or_tasking_serialized ) {
536 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
537 KMP_DEBUG_ASSERT( children >= 0 );
538 }
539
540 // Now, go up the ancestor tree to see if any ancestors can now be freed.
541 while ( children == 0 )
542 {
543 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
544
545 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
546 "and freeing itself\n", gtid, taskdata) );
547
548 // --- Deallocate my ancestor task ---
549 __kmp_free_task( gtid, taskdata, thread );
550
551 taskdata = parent_taskdata;
552
553 // Stop checking ancestors at implicit task or if tasking serialized
554 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
555 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
556 return;
557
558 if ( !team_or_tasking_serialized ) {
559 // Predecrement simulated by "- 1" calculation
560 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
561 KMP_DEBUG_ASSERT( children >= 0 );
562 }
563 }
564
565 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
566 "not freeing it yet\n", gtid, taskdata, children) );
567}
568
569//---------------------------------------------------------------------
570// __kmp_task_finish: bookkeeping to do when a task finishes execution
571// gtid: global thread ID for calling thread
572// task: task to be finished
573// resumed_task: task to be resumed. (may be NULL if task is serialized)
574
575static void
576__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
577{
578 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
579 kmp_info_t * thread = __kmp_threads[ gtid ];
580 kmp_int32 children = 0;
581
582 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
583 gtid, taskdata, resumed_task) );
584
585 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
586
587 // Pop task from stack if tied
588#ifdef BUILD_TIED_TASK_STACK
589 if ( taskdata -> td_flags.tiedness == TASK_TIED )
590 {
591 __kmp_pop_task_stack( gtid, thread, taskdata );
592 }
593#endif /* BUILD_TIED_TASK_STACK */
594
Jim Cownie5e8470a2013-09-27 10:38:44 +0000595 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000596 taskdata -> td_flags.complete = 1; // mark the task as completed
597 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
598 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
599
600 // Only need to keep track of count if team parallel and tasking not serialized
601 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
602 // Predecrement simulated by "- 1" calculation
603 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
604 KMP_DEBUG_ASSERT( children >= 0 );
605#if OMP_40_ENABLED
606 if ( taskdata->td_taskgroup )
607 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000608 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000609#endif
610 }
611
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000612 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
613 // Othertwise, if a task is executed immediately from the release_deps code
614 // the flag will be reset to 1 again by this same function
615 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
616 taskdata -> td_flags.executing = 0; // suspend the finishing task
617
Jim Cownie5e8470a2013-09-27 10:38:44 +0000618 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
619 gtid, taskdata, children) );
620
Jim Cownie181b4bb2013-12-23 17:28:57 +0000621#if OMP_40_ENABLED
622 /* If the tasks' destructor thunk flag has been set, we need to invoke the
623 destructor thunk that has been generated by the compiler.
624 The code is placed here, since at this point other tasks might have been released
625 hence overlapping the destructor invokations with some other work in the
626 released tasks. The OpenMP spec is not specific on when the destructors are
627 invoked, so we should be free to choose.
628 */
629 if (taskdata->td_flags.destructors_thunk) {
630 kmp_routine_entry_t destr_thunk = task->destructors;
631 KMP_ASSERT(destr_thunk);
632 destr_thunk(gtid, task);
633 }
634#endif // OMP_40_ENABLED
635
Jim Cownie5e8470a2013-09-27 10:38:44 +0000636 // bookkeeping for resuming task:
637 // GEH - note tasking_ser => task_serial
638 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
639 taskdata->td_flags.task_serial);
640 if ( taskdata->td_flags.task_serial )
641 {
642 if (resumed_task == NULL) {
643 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
644 }
645 else {
646 // verify resumed task passed in points to parent
647 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
648 }
649 }
650 else {
651 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
652 }
653
654 // Free this task and then ancestor tasks if they have no children.
655 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
656
657 __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
658
659 // TODO: GEH - make sure root team implicit task is initialized properly.
660 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
661 resumed_task->td_flags.executing = 1; // resume previous task
662
663 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
664 gtid, taskdata, resumed_task) );
665
666 return;
667}
668
669//---------------------------------------------------------------------
670// __kmpc_omp_task_complete_if0: report that a task has completed execution
671// loc_ref: source location information; points to end of task block.
672// gtid: global thread number.
673// task: task thunk for the completed task.
674
675void
676__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
677{
678 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
679 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
680
681 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
682
683 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
684 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
685
686 return;
687}
688
689#ifdef TASK_UNUSED
690//---------------------------------------------------------------------
691// __kmpc_omp_task_complete: report that a task has completed execution
692// NEVER GENERATED BY COMPILER, DEPRECATED!!!
693
694void
695__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
696{
697 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
698 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
699
700 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
701
702 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
703 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
704 return;
705}
706#endif // TASK_UNUSED
707
708
Andrey Churbanove5f44922015-04-29 16:22:07 +0000709#if OMPT_SUPPORT
710//----------------------------------------------------------------------------------------------------
711// __kmp_task_init_ompt:
712// Initialize OMPT fields maintained by a task. Since the serial task is initialized before
713// ompt_initialize is called, at the point the serial task is initialized we don't know whether
714// OMPT will be used or not when the serial task is initialized. This function provides the support
715// needed to initialize OMPT for the serial task after the fact.
716
717void
718__kmp_task_init_ompt( kmp_taskdata_t * task, int tid )
719{
720 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
721 task->ompt_task_info.function = NULL;
722 task->ompt_task_info.frame = (ompt_frame_t) {
723 .exit_runtime_frame = NULL,
724 .reenter_runtime_frame = NULL
725 };
726}
727#endif
728
729
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730//----------------------------------------------------------------------------------------------------
731// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
732//
733// loc_ref: reference to source location of parallel region
734// this_thr: thread data structure corresponding to implicit task
735// team: team for this_thr
736// tid: thread id of given thread within team
737// set_curr_task: TRUE if need to push current task to thread
738// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
739// TODO: Get better loc_ref. Value passed in may be NULL
740
741void
742__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
743{
744 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
745
746 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
747 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
748
749 task->td_task_id = KMP_GEN_TASK_ID();
750 task->td_team = team;
751// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
752 task->td_ident = loc_ref;
753 task->td_taskwait_ident = NULL;
754 task->td_taskwait_counter = 0;
755 task->td_taskwait_thread = 0;
756
757 task->td_flags.tiedness = TASK_TIED;
758 task->td_flags.tasktype = TASK_IMPLICIT;
759 // All implicit tasks are executed immediately, not deferred
760 task->td_flags.task_serial = 1;
761 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
762 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
763
764 task->td_flags.started = 1;
765 task->td_flags.executing = 1;
766 task->td_flags.complete = 0;
767 task->td_flags.freed = 0;
768
Jim Cownie181b4bb2013-12-23 17:28:57 +0000769#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000770 task->td_dephash = NULL;
771 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000772#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000773
774 if (set_curr_task) { // only do this initialization the first time a thread is created
775 task->td_incomplete_child_tasks = 0;
776 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
777#if OMP_40_ENABLED
778 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
779#endif
780 __kmp_push_current_task_to_thread( this_thr, team, tid );
781 } else {
782 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
783 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
784 }
785
786 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
787 tid, team, task ) );
788}
789
790// Round up a size to a power of two specified by val
791// Used to insert padding between structures co-allocated using a single malloc() call
792static size_t
793__kmp_round_up_to_val( size_t size, size_t val ) {
794 if ( size & ( val - 1 ) ) {
795 size &= ~ ( val - 1 );
796 if ( size <= KMP_SIZE_T_MAX - val ) {
797 size += val; // Round up if there is no overflow.
798 }; // if
799 }; // if
800 return size;
801} // __kmp_round_up_to_va
802
803
804//---------------------------------------------------------------------------------
805// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
806//
807// loc_ref: source location information
808// gtid: global thread number.
809// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
810// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
811// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
812// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
813// task_entry: Pointer to task code entry point generated by compiler.
814// returns: a pointer to the allocated kmp_task_t structure (task).
815
816kmp_task_t *
817__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
818 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
819 kmp_routine_entry_t task_entry )
820{
821 kmp_task_t *task;
822 kmp_taskdata_t *taskdata;
823 kmp_info_t *thread = __kmp_threads[ gtid ];
824 kmp_team_t *team = thread->th.th_team;
825 kmp_taskdata_t *parent_task = thread->th.th_current_task;
826 size_t shareds_offset;
827
828 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
829 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
830 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
831 sizeof_shareds, task_entry) );
832
833 if ( parent_task->td_flags.final ) {
834 if (flags->merged_if0) {
835 }
836 flags->final = 1;
837 }
838
839 // Calculate shared structure offset including padding after kmp_task_t struct
840 // to align pointers in shared struct
841 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
842 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
843
844 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
845 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
846 gtid, shareds_offset) );
847 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
848 gtid, sizeof_shareds) );
849
850 // Avoid double allocation here by combining shareds with taskdata
851 #if USE_FAST_MEMORY
852 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
853 #else /* ! USE_FAST_MEMORY */
854 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
855 #endif /* USE_FAST_MEMORY */
856
857 task = KMP_TASKDATA_TO_TASK(taskdata);
858
859 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000860#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000861 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
862 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
863#else
864 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
865 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
866#endif
867 if (sizeof_shareds > 0) {
868 // Avoid double allocation here by combining shareds with taskdata
869 task->shareds = & ((char *) taskdata)[ shareds_offset ];
870 // Make sure shareds struct is aligned to pointer size
871 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
872 } else {
873 task->shareds = NULL;
874 }
875 task->routine = task_entry;
876 task->part_id = 0; // AC: Always start with 0 part id
877
878 taskdata->td_task_id = KMP_GEN_TASK_ID();
879 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000880 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000881 taskdata->td_parent = parent_task;
882 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
883 taskdata->td_ident = loc_ref;
884 taskdata->td_taskwait_ident = NULL;
885 taskdata->td_taskwait_counter = 0;
886 taskdata->td_taskwait_thread = 0;
887 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
888 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
889
890 taskdata->td_flags.tiedness = flags->tiedness;
891 taskdata->td_flags.final = flags->final;
892 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000893#if OMP_40_ENABLED
894 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
895#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000896 taskdata->td_flags.tasktype = TASK_EXPLICIT;
897
898 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
899 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
900
901 // GEH - TODO: fix this to copy parent task's value of team_serial flag
902 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
903
904 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
905 // tasks are not left until program termination to execute. Also, it helps locality to execute
906 // immediately.
907 taskdata->td_flags.task_serial = ( taskdata->td_flags.final
908 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
909
910 taskdata->td_flags.started = 0;
911 taskdata->td_flags.executing = 0;
912 taskdata->td_flags.complete = 0;
913 taskdata->td_flags.freed = 0;
914
915 taskdata->td_flags.native = flags->native;
916
917 taskdata->td_incomplete_child_tasks = 0;
918 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
919#if OMP_40_ENABLED
920 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
921 taskdata->td_dephash = NULL;
922 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000923#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000924 // Only need to keep track of child task counts if team parallel and tasking not serialized
925 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
926 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
927#if OMP_40_ENABLED
928 if ( parent_task->td_taskgroup )
929 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
930#endif
931 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
932 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
933 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
934 }
935 }
936
937 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
938 gtid, taskdata, taskdata->td_parent) );
939
940 return task;
941}
942
943
944kmp_task_t *
945__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
946 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
947 kmp_routine_entry_t task_entry )
948{
949 kmp_task_t *retval;
950 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
951
952 input_flags->native = FALSE;
953 // __kmp_task_alloc() sets up all other runtime flags
954
955 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
956 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
957 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
958 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
959
960 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
961 sizeof_shareds, task_entry );
962
963 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
964
965 return retval;
966}
967
968//-----------------------------------------------------------
969// __kmp_invoke_task: invoke the specified task
970//
971// gtid: global thread ID of caller
972// task: the task to invoke
973// current_task: the task to resume after task invokation
974
975static void
976__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
977{
978 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jim Cownie181b4bb2013-12-23 17:28:57 +0000979#if OMP_40_ENABLED
980 int discard = 0 /* false */;
981#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000982 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
983 gtid, taskdata, current_task) );
984
985 __kmp_task_start( gtid, task, current_task );
986
Jim Cownie181b4bb2013-12-23 17:28:57 +0000987#if OMP_40_ENABLED
988 // TODO: cancel tasks if the parallel region has also been cancelled
989 // TODO: check if this sequence can be hoisted above __kmp_task_start
990 // if cancellation has been enabled for this run ...
991 if (__kmp_omp_cancellation) {
992 kmp_info_t *this_thr = __kmp_threads [ gtid ];
993 kmp_team_t * this_team = this_thr->th.th_team;
994 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
995 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
996 // this task belongs to a task group and we need to cancel it
997 discard = 1 /* true */;
998 }
999 }
1000
Jim Cownie5e8470a2013-09-27 10:38:44 +00001001 //
1002 // Invoke the task routine and pass in relevant data.
1003 // Thunks generated by gcc take a different argument list.
1004 //
Jim Cownie181b4bb2013-12-23 17:28:57 +00001005 if (!discard) {
1006#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001007#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +00001008 if (taskdata->td_flags.native) {
1009 ((void (*)(void *))(*(task->routine)))(task->shareds);
1010 }
1011 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001012#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001013 {
1014 (*(task->routine))(gtid, task);
1015 }
1016#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001017 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001018#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001019
1020 __kmp_task_finish( gtid, task, current_task );
1021
1022 KA_TRACE(30, ("__kmp_inovke_task(exit): T#%d completed task %p, resuming task %p\n",
1023 gtid, taskdata, current_task) );
1024 return;
1025}
1026
1027//-----------------------------------------------------------------------
1028// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1029//
1030// loc_ref: location of original task pragma (ignored)
1031// gtid: Global Thread ID of encountering thread
1032// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1033// Returns:
1034// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1035// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1036
1037kmp_int32
1038__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1039{
1040 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1041
1042 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1043 gtid, loc_ref, new_taskdata ) );
1044
1045 /* Should we execute the new task or queue it? For now, let's just always try to
1046 queue it. If the queue fills up, then we'll execute it. */
1047
1048 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1049 { // Execute this task immediately
1050 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1051 new_taskdata->td_flags.task_serial = 1;
1052 __kmp_invoke_task( gtid, new_task, current_task );
1053 }
1054
1055 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1056 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1057 new_taskdata ) );
1058
1059 return TASK_CURRENT_NOT_QUEUED;
1060}
1061
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001062//---------------------------------------------------------------------
1063// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1064// gtid: Global Thread ID of encountering thread
1065// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1066// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1067// returns:
1068//
1069// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1070// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1071kmp_int32
1072__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1073{
1074 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1075
1076 /* Should we execute the new task or queue it? For now, let's just always try to
1077 queue it. If the queue fills up, then we'll execute it. */
1078
1079 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1080 { // Execute this task immediately
1081 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1082 if ( serialize_immediate )
1083 new_taskdata -> td_flags.task_serial = 1;
1084 __kmp_invoke_task( gtid, new_task, current_task );
1085 }
1086
1087
1088 return TASK_CURRENT_NOT_QUEUED;
1089}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001090
1091//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001092// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1093// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001094// loc_ref: location of original task pragma (ignored)
1095// gtid: Global Thread ID of encountering thread
1096// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1097// returns:
1098//
1099// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1100// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1101
1102kmp_int32
1103__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1104{
1105 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001106 kmp_int32 res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001107
1108 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1109 gtid, loc_ref, new_taskdata ) );
1110
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001111 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001112
1113 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1114 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001115 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001116}
1117
Jim Cownie5e8470a2013-09-27 10:38:44 +00001118//-------------------------------------------------------------------------------------
1119// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1120
1121kmp_int32
1122__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1123{
1124 kmp_taskdata_t * taskdata;
1125 kmp_info_t * thread;
1126 int thread_finished = FALSE;
1127
1128 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1129 gtid, loc_ref) );
1130
1131 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1132 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1133
1134 thread = __kmp_threads[ gtid ];
1135 taskdata = thread -> th.th_current_task;
1136#if USE_ITT_BUILD
1137 // Note: These values are used by ITT events as well.
1138#endif /* USE_ITT_BUILD */
1139 taskdata->td_taskwait_counter += 1;
1140 taskdata->td_taskwait_ident = loc_ref;
1141 taskdata->td_taskwait_thread = gtid + 1;
1142
1143#if USE_ITT_BUILD
1144 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1145 if ( itt_sync_obj != NULL )
1146 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1147#endif /* USE_ITT_BUILD */
1148
1149 if ( ! taskdata->td_flags.team_serial ) {
1150 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001151 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001152 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001153 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1154 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001155 }
1156 }
1157#if USE_ITT_BUILD
1158 if ( itt_sync_obj != NULL )
1159 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1160#endif /* USE_ITT_BUILD */
1161
1162 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1163 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1164 }
1165
1166 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1167 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1168
1169 return TASK_CURRENT_NOT_QUEUED;
1170}
1171
1172
1173//-------------------------------------------------
1174// __kmpc_omp_taskyield: switch to a different task
1175
1176kmp_int32
1177__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1178{
1179 kmp_taskdata_t * taskdata;
1180 kmp_info_t * thread;
1181 int thread_finished = FALSE;
1182
1183 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1184 gtid, loc_ref, end_part) );
1185
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001186 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001187 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1188
1189 thread = __kmp_threads[ gtid ];
1190 taskdata = thread -> th.th_current_task;
1191 // Should we model this as a task wait or not?
1192#if USE_ITT_BUILD
1193 // Note: These values are used by ITT events as well.
1194#endif /* USE_ITT_BUILD */
1195 taskdata->td_taskwait_counter += 1;
1196 taskdata->td_taskwait_ident = loc_ref;
1197 taskdata->td_taskwait_thread = gtid + 1;
1198
1199#if USE_ITT_BUILD
1200 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1201 if ( itt_sync_obj != NULL )
1202 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1203#endif /* USE_ITT_BUILD */
1204 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001205 kmp_task_team_t * task_team = thread->th.th_task_team;
1206 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001207 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001208 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1209 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1210 }
1211 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001212 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001213#if USE_ITT_BUILD
1214 if ( itt_sync_obj != NULL )
1215 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1216#endif /* USE_ITT_BUILD */
1217
1218 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1219 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1220 }
1221
1222 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1223 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1224
1225 return TASK_CURRENT_NOT_QUEUED;
1226}
1227
1228
1229#if OMP_40_ENABLED
1230//-------------------------------------------------------------------------------------
1231// __kmpc_taskgroup: Start a new taskgroup
1232
1233void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001234__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001235{
1236 kmp_info_t * thread = __kmp_threads[ gtid ];
1237 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1238 kmp_taskgroup_t * tg_new =
1239 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1240 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1241 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001242 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001243 tg_new->parent = taskdata->td_taskgroup;
1244 taskdata->td_taskgroup = tg_new;
1245}
1246
1247
1248//-------------------------------------------------------------------------------------
1249// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1250// and its descendants are complete
1251
1252void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001253__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001254{
1255 kmp_info_t * thread = __kmp_threads[ gtid ];
1256 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1257 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1258 int thread_finished = FALSE;
1259
1260 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1261 KMP_DEBUG_ASSERT( taskgroup != NULL );
1262
1263 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1264#if USE_ITT_BUILD
1265 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1266 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1267 if ( itt_sync_obj != NULL )
1268 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1269#endif /* USE_ITT_BUILD */
1270
1271 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001272 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001273 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001274 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1275 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001276 }
1277 }
1278
1279#if USE_ITT_BUILD
1280 if ( itt_sync_obj != NULL )
1281 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1282#endif /* USE_ITT_BUILD */
1283 }
1284 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1285
1286 // Restore parent taskgroup for the current task
1287 taskdata->td_taskgroup = taskgroup->parent;
1288 __kmp_thread_free( thread, taskgroup );
1289
1290 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1291}
1292#endif
1293
1294
1295//------------------------------------------------------
1296// __kmp_remove_my_task: remove a task from my own deque
1297
1298static kmp_task_t *
1299__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1300 kmp_int32 is_constrained )
1301{
1302 kmp_task_t * task;
1303 kmp_taskdata_t * taskdata;
1304 kmp_thread_data_t *thread_data;
1305 kmp_uint32 tail;
1306
1307 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1308 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1309
1310 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1311
1312 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1313 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1314 thread_data->td.td_deque_tail) );
1315
1316 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1317 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1318 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1319 thread_data->td.td_deque_tail) );
1320 return NULL;
1321 }
1322
1323 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1324
1325 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1326 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1327 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1328 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1329 thread_data->td.td_deque_tail) );
1330 return NULL;
1331 }
1332
1333 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1334 taskdata = thread_data -> td.td_deque[ tail ];
1335
1336 if (is_constrained) {
1337 // we need to check if the candidate obeys task scheduling constraint:
1338 // only child of current task can be scheduled
1339 kmp_taskdata_t * current = thread->th.th_current_task;
1340 kmp_int32 level = current->td_level;
1341 kmp_taskdata_t * parent = taskdata->td_parent;
1342 while ( parent != current && parent->td_level > level ) {
1343 parent = parent->td_parent; // check generation up to the level of the current task
1344 KMP_DEBUG_ASSERT(parent != NULL);
1345 }
1346 if ( parent != current ) {
1347 // If the tail task is not a child, then no other childs can appear in the deque.
1348 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1349 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1350 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1351 thread_data->td.td_deque_tail) );
1352 return NULL;
1353 }
1354 }
1355
1356 thread_data -> td.td_deque_tail = tail;
1357 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1358
1359 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1360
1361 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1362 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1363 thread_data->td.td_deque_tail) );
1364
1365 task = KMP_TASKDATA_TO_TASK( taskdata );
1366 return task;
1367}
1368
1369
1370//-----------------------------------------------------------
1371// __kmp_steal_task: remove a task from another thread's deque
1372// Assume that calling thread has already checked existence of
1373// task_team thread_data before calling this routine.
1374
1375static kmp_task_t *
1376__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1377 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1378 kmp_int32 is_constrained )
1379{
1380 kmp_task_t * task;
1381 kmp_taskdata_t * taskdata;
1382 kmp_thread_data_t *victim_td, *threads_data;
1383 kmp_int32 victim_tid, thread_tid;
1384
1385 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1386
1387 threads_data = task_team -> tt.tt_threads_data;
1388 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1389
1390 victim_tid = victim->th.th_info.ds.ds_tid;
1391 victim_td = & threads_data[ victim_tid ];
1392
1393 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1394 "head=%u tail=%u\n",
1395 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1396 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1397
1398 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1399 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1400 {
1401 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1402 "ntasks=%d head=%u tail=%u\n",
1403 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1404 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1405 return NULL;
1406 }
1407
1408 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1409
1410 // Check again after we acquire the lock
1411 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1412 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1413 {
1414 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1415 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1416 "ntasks=%d head=%u tail=%u\n",
1417 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1418 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1419 return NULL;
1420 }
1421
1422 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1423
1424 if ( !is_constrained ) {
1425 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1426 // Bump head pointer and Wrap.
1427 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1428 } else {
1429 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1430 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1431 taskdata = victim_td -> td.td_deque[ tail ];
1432 // we need to check if the candidate obeys task scheduling constraint:
1433 // only child of current task can be scheduled
1434 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1435 kmp_int32 level = current->td_level;
1436 kmp_taskdata_t * parent = taskdata->td_parent;
1437 while ( parent != current && parent->td_level > level ) {
1438 parent = parent->td_parent; // check generation up to the level of the current task
1439 KMP_DEBUG_ASSERT(parent != NULL);
1440 }
1441 if ( parent != current ) {
1442 // If the tail task is not a child, then no other childs can appear in the deque (?).
1443 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1444 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1445 "ntasks=%d head=%u tail=%u\n",
1446 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1447 task_team, victim_td->td.td_deque_ntasks,
1448 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1449 return NULL;
1450 }
1451 victim_td -> td.td_deque_tail = tail;
1452 }
1453 if (*thread_finished) {
1454 // We need to un-mark this victim as a finished victim. This must be done before
1455 // releasing the lock, or else other threads (starting with the master victim)
1456 // might be prematurely released from the barrier!!!
1457 kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1458
1459 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1460 gtid, count + 1, task_team) );
1461
1462 *thread_finished = FALSE;
1463 }
1464 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1465
1466 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1467
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001468 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001469 "ntasks=%d head=%u tail=%u\n",
1470 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1471 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1472 victim_td->td.td_deque_tail) );
1473
1474 task = KMP_TASKDATA_TO_TASK( taskdata );
1475 return task;
1476}
1477
1478
1479//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001480// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001481// is statisfied (return true) or there are none left (return false).
1482// final_spin is TRUE if this is the spin at the release barrier.
1483// thread_finished indicates whether the thread is finished executing all
1484// the tasks it has on its deque, and is at the release barrier.
1485// spinner is the location on which to spin.
1486// spinner == NULL means only execute a single task and return.
1487// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001488template <class C>
1489static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1490 int *thread_finished
1491 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001492{
1493 kmp_task_team_t * task_team;
1494 kmp_team_t * team;
1495 kmp_thread_data_t * threads_data;
1496 kmp_task_t * task;
1497 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1498 volatile kmp_uint32 * unfinished_threads;
1499 kmp_int32 nthreads, last_stolen, k, tid;
1500
1501 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1502 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1503
1504 task_team = thread -> th.th_task_team;
1505 KMP_DEBUG_ASSERT( task_team != NULL );
1506
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001507 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001508 gtid, final_spin, *thread_finished) );
1509
1510 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1511 KMP_DEBUG_ASSERT( threads_data != NULL );
1512
1513 nthreads = task_team -> tt.tt_nproc;
1514 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1515 KMP_DEBUG_ASSERT( nthreads > 1 );
1516 KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1517
1518 // Choose tasks from our own work queue.
1519 start:
1520 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1521#if USE_ITT_BUILD && USE_ITT_NOTIFY
1522 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1523 if ( itt_sync_obj == NULL ) {
1524 // we are at fork barrier where we could not get the object reliably
1525 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1526 }
1527 __kmp_itt_task_starting( itt_sync_obj );
1528 }
1529#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1530 __kmp_invoke_task( gtid, task, current_task );
1531#if USE_ITT_BUILD
1532 if ( itt_sync_obj != NULL )
1533 __kmp_itt_task_finished( itt_sync_obj );
1534#endif /* USE_ITT_BUILD */
1535
1536 // If this thread is only partway through the barrier and the condition
1537 // is met, then return now, so that the barrier gather/release pattern can proceed.
1538 // If this thread is in the last spin loop in the barrier, waiting to be
1539 // released, we know that the termination condition will not be satisified,
1540 // so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001541 if (flag == NULL || (!final_spin && flag->done_check())) {
1542 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001543 return TRUE;
1544 }
1545 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1546 }
1547
1548 // This thread's work queue is empty. If we are in the final spin loop
1549 // of the barrier, check and see if the termination condition is satisfied.
1550 if (final_spin) {
1551 // First, decrement the #unfinished threads, if that has not already
1552 // been done. This decrement might be to the spin location, and
1553 // result in the termination condition being satisfied.
1554 if (! *thread_finished) {
1555 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001556 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001557 gtid, count, task_team) );
1558 *thread_finished = TRUE;
1559 }
1560
1561 // It is now unsafe to reference thread->th.th_team !!!
1562 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1563 // thread to pass through the barrier, where it might reset each thread's
1564 // th.th_team field for the next parallel region.
1565 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001566 if (flag != NULL && flag->done_check()) {
1567 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001568 return TRUE;
1569 }
1570 }
1571
1572 // Try to steal from the last place I stole from successfully.
1573 tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1574 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1575
1576 if (last_stolen != -1) {
1577 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1578
1579 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1580 thread_finished, is_constrained )) != NULL)
1581 {
1582#if USE_ITT_BUILD && USE_ITT_NOTIFY
1583 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1584 if ( itt_sync_obj == NULL ) {
1585 // we are at fork barrier where we could not get the object reliably
1586 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1587 }
1588 __kmp_itt_task_starting( itt_sync_obj );
1589 }
1590#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1591 __kmp_invoke_task( gtid, task, current_task );
1592#if USE_ITT_BUILD
1593 if ( itt_sync_obj != NULL )
1594 __kmp_itt_task_finished( itt_sync_obj );
1595#endif /* USE_ITT_BUILD */
1596
1597 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001598 if (flag == NULL || (!final_spin && flag->done_check())) {
1599 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001600 gtid) );
1601 return TRUE;
1602 }
1603
1604 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1605 // If the execution of the stolen task resulted in more tasks being
1606 // placed on our run queue, then restart the whole process.
1607 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001608 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001609 gtid) );
1610 goto start;
1611 }
1612 }
1613
1614 // Don't give priority to stealing from this thread anymore.
1615 threads_data[ tid ].td.td_deque_last_stolen = -1;
1616
1617 // The victims's work queue is empty. If we are in the final spin loop
1618 // of the barrier, check and see if the termination condition is satisfied.
1619 if (final_spin) {
1620 // First, decrement the #unfinished threads, if that has not already
1621 // been done. This decrement might be to the spin location, and
1622 // result in the termination condition being satisfied.
1623 if (! *thread_finished) {
1624 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001625 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001626 "task_team=%p\n", gtid, count, task_team) );
1627 *thread_finished = TRUE;
1628 }
1629
1630 // If __kmp_tasking_mode != tskm_immediate_exec
1631 // then it is now unsafe to reference thread->th.th_team !!!
1632 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1633 // thread to pass through the barrier, where it might reset each thread's
1634 // th.th_team field for the next parallel region.
1635 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001636 if (flag != NULL && flag->done_check()) {
1637 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001638 gtid) );
1639 return TRUE;
1640 }
1641 }
1642 }
1643
1644 // Find a different thread to steal work from. Pick a random thread.
1645 // My initial plan was to cycle through all the threads, and only return
1646 // if we tried to steal from every thread, and failed. Arch says that's
1647 // not such a great idea.
1648 // GEH - need yield code in this loop for throughput library mode?
1649 new_victim:
1650 k = __kmp_get_random( thread ) % (nthreads - 1);
1651 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1652 ++k; // Adjusts random distribution to exclude self
1653 }
1654 {
1655 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1656 int first;
1657
1658 // There is a slight chance that __kmp_enable_tasking() did not wake up
1659 // all threads waiting at the barrier. If this thread is sleeping, then
1660 // then wake it up. Since we weree going to pay the cache miss penalty
1661 // for referenceing another thread's kmp_info_t struct anyway, the check
1662 // shouldn't cost too much performance at this point.
1663 // In extra barrier mode, tasks do not sleep at the separate tasking
1664 // barrier, so this isn't a problem.
1665 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1666 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1667 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1668 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001669 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001670 // A sleeping thread should not have any tasks on it's queue.
Alp Toker8f2d3f02014-02-24 10:40:15 +00001671 // There is a slight possibility that it resumes, steals a task from
Jim Cownie5e8470a2013-09-27 10:38:44 +00001672 // another thread, which spawns more tasks, all in the that it takes
1673 // this thread to check => don't write an assertion that the victim's
1674 // queue is empty. Try stealing from a different thread.
1675 goto new_victim;
1676 }
1677
1678 // Now try to steal work from the selected thread
1679 first = TRUE;
1680 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1681 thread_finished, is_constrained )) != NULL)
1682 {
1683#if USE_ITT_BUILD && USE_ITT_NOTIFY
1684 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1685 if ( itt_sync_obj == NULL ) {
1686 // we are at fork barrier where we could not get the object reliably
1687 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1688 }
1689 __kmp_itt_task_starting( itt_sync_obj );
1690 }
1691#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1692 __kmp_invoke_task( gtid, task, current_task );
1693#if USE_ITT_BUILD
1694 if ( itt_sync_obj != NULL )
1695 __kmp_itt_task_finished( itt_sync_obj );
1696#endif /* USE_ITT_BUILD */
1697
1698 // Try stealing from this victim again, in the future.
1699 if (first) {
1700 threads_data[ tid ].td.td_deque_last_stolen = k;
1701 first = FALSE;
1702 }
1703
1704 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001705 if (flag == NULL || (!final_spin && flag->done_check())) {
1706 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001707 gtid) );
1708 return TRUE;
1709 }
1710 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1711
1712 // If the execution of the stolen task resulted in more tasks being
1713 // placed on our run queue, then restart the whole process.
1714 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001715 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001716 gtid) );
1717 goto start;
1718 }
1719 }
1720
1721 // The victims's work queue is empty. If we are in the final spin loop
1722 // of the barrier, check and see if the termination condition is satisfied.
1723 // Going on and finding a new victim to steal from is expensive, as it
1724 // involves a lot of cache misses, so we definitely want to re-check the
1725 // termination condition before doing that.
1726 if (final_spin) {
1727 // First, decrement the #unfinished threads, if that has not already
1728 // been done. This decrement might be to the spin location, and
1729 // result in the termination condition being satisfied.
1730 if (! *thread_finished) {
1731 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001732 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001733 "task_team=%p\n",
1734 gtid, count, task_team) );
1735 *thread_finished = TRUE;
1736 }
1737
1738 // If __kmp_tasking_mode != tskm_immediate_exec,
1739 // then it is now unsafe to reference thread->th.th_team !!!
1740 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1741 // thread to pass through the barrier, where it might reset each thread's
1742 // th.th_team field for the next parallel region.
1743 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001744 if (flag != NULL && flag->done_check()) {
1745 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001746 return TRUE;
1747 }
1748 }
1749 }
1750
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001751 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001752 return FALSE;
1753}
1754
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001755int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1756 int *thread_finished
1757 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1758{
1759 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1760 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1761}
1762
1763int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
1764 int *thread_finished
1765 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1766{
1767 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1768 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1769}
1770
1771int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
1772 int *thread_finished
1773 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1774{
1775 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1776 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1777}
1778
1779
Jim Cownie5e8470a2013-09-27 10:38:44 +00001780
1781//-----------------------------------------------------------------------------
1782// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
1783// next barrier so they can assist in executing enqueued tasks.
1784// First thread in allocates the task team atomically.
1785
1786static void
1787__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
1788{
1789 kmp_team_t *team = this_thr->th.th_team;
1790 kmp_thread_data_t *threads_data;
1791 int nthreads, i, is_init_thread;
1792
1793 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
1794 __kmp_gtid_from_thread( this_thr ) ) );
1795
1796 KMP_DEBUG_ASSERT(task_team != NULL);
1797 KMP_DEBUG_ASSERT(team != NULL);
1798
1799 nthreads = task_team->tt.tt_nproc;
1800 KMP_DEBUG_ASSERT(nthreads > 0);
1801 KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
1802
1803 // Allocate or increase the size of threads_data if necessary
1804 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
1805
1806 if (!is_init_thread) {
1807 // Some other thread already set up the array.
1808 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
1809 __kmp_gtid_from_thread( this_thr ) ) );
1810 return;
1811 }
1812 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1813 KMP_DEBUG_ASSERT( threads_data != NULL );
1814
1815 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1816 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
1817 {
1818 // Release any threads sleeping at the barrier, so that they can steal
1819 // tasks and execute them. In extra barrier mode, tasks do not sleep
1820 // at the separate tasking barrier, so this isn't a problem.
1821 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001822 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001823 kmp_info_t *thread = threads_data[i].td.td_thr;
1824
1825 if (i == this_thr->th.th_info.ds.ds_tid) {
1826 continue;
1827 }
1828 // Since we haven't locked the thread's suspend mutex lock at this
1829 // point, there is a small window where a thread might be putting
1830 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001831 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00001832 // see if other threads are sleeping (using the same random
1833 // mechanism that is used for task stealing) and awakens them if
1834 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001835 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001836 {
1837 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
1838 __kmp_gtid_from_thread( this_thr ),
1839 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001840 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001841 }
1842 else {
1843 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
1844 __kmp_gtid_from_thread( this_thr ),
1845 __kmp_gtid_from_thread( thread ) ) );
1846 }
1847 }
1848 }
1849
1850 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
1851 __kmp_gtid_from_thread( this_thr ) ) );
1852}
1853
1854
1855/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001856/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00001857 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
1858 * like a shadow of the kmp_team_t data struct, with a different lifetime.
1859 * After a child * thread checks into a barrier and calls __kmp_release() from
1860 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
1861 * longer assume that the kmp_team_t structure is intact (at any moment, the
1862 * master thread may exit the barrier code and free the team data structure,
1863 * and return the threads to the thread pool).
1864 *
1865 * This does not work with the the tasking code, as the thread is still
1866 * expected to participate in the execution of any tasks that may have been
1867 * spawned my a member of the team, and the thread still needs access to all
1868 * to each thread in the team, so that it can steal work from it.
1869 *
1870 * Enter the existence of the kmp_task_team_t struct. It employs a reference
1871 * counting mechanims, and is allocated by the master thread before calling
1872 * __kmp_<barrier_kind>_release, and then is release by the last thread to
1873 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
1874 * of the kmp_task_team_t structs for consecutive barriers can overlap
1875 * (and will, unless the master thread is the last thread to exit the barrier
1876 * release phase, which is not typical).
1877 *
1878 * The existence of such a struct is useful outside the context of tasking,
1879 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
1880 * so that any performance differences show up when comparing the 2.5 vs. 3.0
1881 * libraries.
1882 *
1883 * We currently use the existence of the threads array as an indicator that
1884 * tasks were spawned since the last barrier. If the structure is to be
1885 * useful outside the context of tasking, then this will have to change, but
1886 * not settting the field minimizes the performance impact of tasking on
1887 * barriers, when no explicit tasks were spawned (pushed, actually).
1888 */
1889
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001890
Jim Cownie5e8470a2013-09-27 10:38:44 +00001891static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
1892// Lock for task team data structures
1893static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
1894
1895
1896//------------------------------------------------------------------------------
1897// __kmp_alloc_task_deque:
1898// Allocates a task deque for a particular thread, and initialize the necessary
1899// data structures relating to the deque. This only happens once per thread
1900// per task team since task teams are recycled.
1901// No lock is needed during allocation since each thread allocates its own
1902// deque.
1903
1904static void
1905__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
1906{
1907 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
1908 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
1909
1910 // Initialize last stolen task field to "none"
1911 thread_data -> td.td_deque_last_stolen = -1;
1912
1913 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
1914 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
1915 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
1916
1917 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
1918 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
1919 // Allocate space for task deque, and zero the deque
1920 // Cannot use __kmp_thread_calloc() because threads not around for
1921 // kmp_reap_task_team( ).
1922 thread_data -> td.td_deque = (kmp_taskdata_t **)
1923 __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
1924}
1925
1926
1927//------------------------------------------------------------------------------
1928// __kmp_free_task_deque:
1929// Deallocates a task deque for a particular thread.
1930// Happens at library deallocation so don't need to reset all thread data fields.
1931
1932static void
1933__kmp_free_task_deque( kmp_thread_data_t *thread_data )
1934{
1935 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1936
1937 if ( thread_data -> td.td_deque != NULL ) {
1938 TCW_4(thread_data -> td.td_deque_ntasks, 0);
1939 __kmp_free( thread_data -> td.td_deque );
1940 thread_data -> td.td_deque = NULL;
1941 }
1942 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1943
1944#ifdef BUILD_TIED_TASK_STACK
1945 // GEH: Figure out what to do here for td_susp_tied_tasks
1946 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
1947 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
1948 }
1949#endif // BUILD_TIED_TASK_STACK
1950}
1951
1952
1953//------------------------------------------------------------------------------
1954// __kmp_realloc_task_threads_data:
1955// Allocates a threads_data array for a task team, either by allocating an initial
1956// array or enlarging an existing array. Only the first thread to get the lock
1957// allocs or enlarges the array and re-initializes the array eleemnts.
1958// That thread returns "TRUE", the rest return "FALSE".
1959// Assumes that the new array size is given by task_team -> tt.tt_nproc.
1960// The current size is given by task_team -> tt.tt_max_threads.
1961
1962static int
1963__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
1964{
1965 kmp_thread_data_t ** threads_data_p;
1966 kmp_int32 nthreads, maxthreads;
1967 int is_init_thread = FALSE;
1968
1969 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
1970 // Already reallocated and initialized.
1971 return FALSE;
1972 }
1973
1974 threads_data_p = & task_team -> tt.tt_threads_data;
1975 nthreads = task_team -> tt.tt_nproc;
1976 maxthreads = task_team -> tt.tt_max_threads;
1977
1978 // All threads must lock when they encounter the first task of the implicit task
1979 // region to make sure threads_data fields are (re)initialized before used.
1980 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
1981
1982 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
1983 // first thread to enable tasking
1984 kmp_team_t *team = thread -> th.th_team;
1985 int i;
1986
1987 is_init_thread = TRUE;
1988 if ( maxthreads < nthreads ) {
1989
1990 if ( *threads_data_p != NULL ) {
1991 kmp_thread_data_t *old_data = *threads_data_p;
1992 kmp_thread_data_t *new_data = NULL;
1993
1994 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
1995 "threads data for task_team %p, new_size = %d, old_size = %d\n",
1996 __kmp_gtid_from_thread( thread ), task_team,
1997 nthreads, maxthreads ) );
1998 // Reallocate threads_data to have more elements than current array
1999 // Cannot use __kmp_thread_realloc() because threads not around for
2000 // kmp_reap_task_team( ). Note all new array entries are initialized
2001 // to zero by __kmp_allocate().
2002 new_data = (kmp_thread_data_t *)
2003 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2004 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002005 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
2006 (void *) old_data,
2007 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002008
2009#ifdef BUILD_TIED_TASK_STACK
2010 // GEH: Figure out if this is the right thing to do
2011 for (i = maxthreads; i < nthreads; i++) {
2012 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2013 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2014 }
2015#endif // BUILD_TIED_TASK_STACK
2016 // Install the new data and free the old data
2017 (*threads_data_p) = new_data;
2018 __kmp_free( old_data );
2019 }
2020 else {
2021 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2022 "threads data for task_team %p, size = %d\n",
2023 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2024 // Make the initial allocate for threads_data array, and zero entries
2025 // Cannot use __kmp_thread_calloc() because threads not around for
2026 // kmp_reap_task_team( ).
2027 *threads_data_p = (kmp_thread_data_t *)
2028 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2029#ifdef BUILD_TIED_TASK_STACK
2030 // GEH: Figure out if this is the right thing to do
2031 for (i = 0; i < nthreads; i++) {
2032 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2033 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2034 }
2035#endif // BUILD_TIED_TASK_STACK
2036 }
2037 task_team -> tt.tt_max_threads = nthreads;
2038 }
2039 else {
2040 // If array has (more than) enough elements, go ahead and use it
2041 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2042 }
2043
2044 // initialize threads_data pointers back to thread_info structures
2045 for (i = 0; i < nthreads; i++) {
2046 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2047 thread_data -> td.td_thr = team -> t.t_threads[i];
2048
2049 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2050 // The last stolen field survives across teams / barrier, and the number
2051 // of threads may have changed. It's possible (likely?) that a new
2052 // parallel region will exhibit the same behavior as the previous region.
2053 thread_data -> td.td_deque_last_stolen = -1;
2054 }
2055 }
2056
2057 KMP_MB();
2058 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2059 }
2060
2061 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2062 return is_init_thread;
2063}
2064
2065
2066//------------------------------------------------------------------------------
2067// __kmp_free_task_threads_data:
2068// Deallocates a threads_data array for a task team, including any attached
2069// tasking deques. Only occurs at library shutdown.
2070
2071static void
2072__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2073{
2074 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2075 if ( task_team -> tt.tt_threads_data != NULL ) {
2076 int i;
2077 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2078 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2079 }
2080 __kmp_free( task_team -> tt.tt_threads_data );
2081 task_team -> tt.tt_threads_data = NULL;
2082 }
2083 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2084}
2085
2086
2087//------------------------------------------------------------------------------
2088// __kmp_allocate_task_team:
2089// Allocates a task team associated with a specific team, taking it from
2090// the global task team free list if possible. Also initializes data structures.
2091
2092static kmp_task_team_t *
2093__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2094{
2095 kmp_task_team_t *task_team = NULL;
2096 int nthreads;
2097
2098 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2099 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2100
2101 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2102 // Take a task team from the task team pool
2103 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2104 if (__kmp_free_task_teams != NULL) {
2105 task_team = __kmp_free_task_teams;
2106 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2107 task_team -> tt.tt_next = NULL;
2108 }
2109 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2110 }
2111
2112 if (task_team == NULL) {
2113 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2114 "task team for team %p\n",
2115 __kmp_gtid_from_thread( thread ), team ) );
2116 // Allocate a new task team if one is not available.
2117 // Cannot use __kmp_thread_malloc() because threads not around for
2118 // kmp_reap_task_team( ).
2119 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2120 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2121 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2122 //task_team -> tt.tt_max_threads = 0;
2123 //task_team -> tt.tt_next = NULL;
2124 }
2125
2126 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2127 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2128
Jim Cownie5e8470a2013-09-27 10:38:44 +00002129 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2130 TCW_4( task_team -> tt.tt_active, TRUE );
2131 TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2132
2133 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2134 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2135 return task_team;
2136}
2137
2138
2139//------------------------------------------------------------------------------
2140// __kmp_free_task_team:
2141// Frees the task team associated with a specific thread, and adds it
2142// to the global task team free list.
2143//
2144
2145static void
2146__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2147{
2148 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2149 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2150
2151 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2152
2153 // Put task team back on free list
2154 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2155
2156 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2157 task_team -> tt.tt_next = __kmp_free_task_teams;
2158 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2159 TCW_PTR(__kmp_free_task_teams, task_team);
2160
2161 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2162}
2163
2164
2165//------------------------------------------------------------------------------
2166// __kmp_reap_task_teams:
2167// Free all the task teams on the task team free list.
2168// Should only be done during library shutdown.
2169// Cannot do anything that needs a thread structure or gtid since they are already gone.
2170
2171void
2172__kmp_reap_task_teams( void )
2173{
2174 kmp_task_team_t *task_team;
2175
2176 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2177 // Free all task_teams on the free list
2178 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2179 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2180 __kmp_free_task_teams = task_team -> tt.tt_next;
2181 task_team -> tt.tt_next = NULL;
2182
2183 // Free threads_data if necessary
2184 if ( task_team -> tt.tt_threads_data != NULL ) {
2185 __kmp_free_task_threads_data( task_team );
2186 }
2187 __kmp_free( task_team );
2188 }
2189 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2190 }
2191}
2192
2193
2194//------------------------------------------------------------------------------
2195// __kmp_unref_task_teams:
2196// Remove one thread from referencing the task team structure by
2197// decreasing the reference count and deallocate task team if no more
2198// references to it.
2199//
2200void
2201__kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2202{
2203 kmp_uint ref_ct;
2204
2205 ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2206
2207 KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2208 __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2209
2210
2211 if ( ref_ct == 0 ) {
2212 __kmp_free_task_team( thread, task_team );
2213 }
2214
2215 TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2216}
2217
2218
2219//------------------------------------------------------------------------------
2220// __kmp_wait_to_unref_task_teams:
2221// Some threads could still be in the fork barrier release code, possibly
2222// trying to steal tasks. Wait for each thread to unreference its task team.
2223//
2224void
2225__kmp_wait_to_unref_task_teams(void)
2226{
2227 kmp_info_t *thread;
2228 kmp_uint32 spins;
2229 int done;
2230
2231 KMP_INIT_YIELD( spins );
2232
2233
2234 for (;;) {
2235 done = TRUE;
2236
2237 // TODO: GEH - this may be is wrong because some sync would be necessary
2238 // in case threads are added to the pool during the traversal.
2239 // Need to verify that lock for thread pool is held when calling
2240 // this routine.
2241 for (thread = (kmp_info_t *)__kmp_thread_pool;
2242 thread != NULL;
2243 thread = thread->th.th_next_pool)
2244 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002245#if KMP_OS_WINDOWS
2246 DWORD exit_val;
2247#endif
2248 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2249 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2250 __kmp_gtid_from_thread( thread ) ) );
2251 continue;
2252 }
2253#if KMP_OS_WINDOWS
2254 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2255 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2256 if (TCR_PTR(thread->th.th_task_team) != NULL) {
2257 __kmp_unref_task_team( thread->th.th_task_team, thread );
2258 }
2259 continue;
2260 }
2261#endif
2262
2263 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2264
2265 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2266 __kmp_gtid_from_thread( thread ) ) );
2267
2268 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002269 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002270 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002271 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002272 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2273 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002274 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002275 }
2276 }
2277 }
2278 if (done) {
2279 break;
2280 }
2281
2282 // If we are oversubscribed,
2283 // or have waited a bit (and library mode is throughput), yield.
2284 // Pause is in the following code.
2285 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2286 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2287 }
2288
2289
2290}
2291
2292
2293//------------------------------------------------------------------------------
2294// __kmp_task_team_setup: Create a task_team for the current team, but use
2295// an already created, unused one if it already exists.
2296// This may be called by any thread, but only for teams with # threads >1.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002297void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002298__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002299{
2300 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2301
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002302 if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( team->t.t_nproc > 1 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002303 // Allocate a new task team, which will be propagated to
2304 // all of the worker threads after the barrier. As they
2305 // spin in the barrier release phase, then will continue
2306 // to use the previous task team struct, until they receive
2307 // the signal to stop checking for tasks (they can't safely
2308 // reference the kmp_team_t struct, which could be reallocated
2309 // by the master thread).
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002310 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2311 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2312 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jim Cownie5e8470a2013-09-27 10:38:44 +00002313 ((team != NULL) ? team->t.t_id : -1)) );
2314 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002315 //else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002316 // All threads have reported in, and no tasks were spawned
2317 // for this release->gather region. Leave the old task
2318 // team struct in place for the upcoming region. No task
2319 // teams are formed for serialized teams.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002320 if (both) {
2321 int other_team = 1 - this_thr->th.th_task_state;
2322 if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well
2323 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2324 KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2325 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2326 ((team != NULL) ? team->t.t_id : -1)) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002327 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002328 }
2329}
2330
2331
2332//------------------------------------------------------------------------------
2333// __kmp_task_team_sync: Propagation of task team data from team to threads
2334// which happens just after the release phase of a team barrier. This may be
2335// called by any thread, but only for teams with # threads > 1.
2336
2337void
2338__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2339{
2340 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2341
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002342 // In case this thread never saw that the task team was no longer active, unref/deallocate it now.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002343 if ( this_thr->th.th_task_team != NULL ) {
2344 if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2345 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2346 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002347 } else { // We are re-using a task team that was never enabled.
2348 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002349 }
2350 }
2351
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002352 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jim Cownie5e8470a2013-09-27 10:38:44 +00002353 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002354 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2355 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002356 KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2357 __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2358 this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2359}
2360
2361
2362//------------------------------------------------------------------------------
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002363// __kmp_task_team_wait: Master thread waits for outstanding tasks after the
2364// barrier gather phase. Only called by master thread if #threads in team > 1 !
Jim Cownie5e8470a2013-09-27 10:38:44 +00002365void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002366__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002367 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002368 )
2369{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002370 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002371
2372 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2373 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2374
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002375 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002376 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2377 __kmp_gtid_from_thread( this_thr ), task_team ) );
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002378 // All worker threads might have dropped through to the release phase, but could still
2379 // be executing tasks. Wait here for all tasks to complete. To avoid memory contention,
2380 // only the master thread checks for the termination condition.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002381 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2382 flag.wait(this_thr, TRUE
2383 USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002384
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002385 // Kill the old task team, so that the worker threads will stop referencing it while spinning.
2386 // They will deallocate it when the reference count reaches zero.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002387 // The master thread is not included in the ref count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002388 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2389 __kmp_gtid_from_thread( this_thr ), task_team ) );
2390 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2391 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2392 KMP_MB();
2393
2394 TCW_PTR(this_thr->th.th_task_team, NULL);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002395 team->t.t_task_team[this_thr->th.th_task_state] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002396 }
2397}
2398
2399
2400//------------------------------------------------------------------------------
2401// __kmp_tasking_barrier:
2402// Internal function to execute all tasks prior to a regular barrier or a
2403// join barrier. It is a full barrier itself, which unfortunately turns
2404// regular barriers into double barriers and join barriers into 1 1/2
2405// barriers.
2406// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2407
2408void
2409__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2410{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002411 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002412 int flag = FALSE;
2413 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2414
2415#if USE_ITT_BUILD
2416 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2417#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002418 kmp_flag_32 spin_flag(spin, 0U);
2419 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2420 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002421#if USE_ITT_BUILD
2422 // TODO: What about itt_sync_obj??
2423 KMP_FSYNC_SPIN_PREPARE( spin );
2424#endif /* USE_ITT_BUILD */
2425
2426 if( TCR_4(__kmp_global.g.g_done) ) {
2427 if( __kmp_global.g.g_abort )
2428 __kmp_abort_thread( );
2429 break;
2430 }
2431 KMP_YIELD( TRUE ); // GH: We always yield here
2432 }
2433#if USE_ITT_BUILD
2434 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2435#endif /* USE_ITT_BUILD */
2436}
2437