blob: f5c6eea6fc1873709d9e7f8176dfbeed76ed410b [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000020
Andrey Churbanove5f44922015-04-29 16:22:07 +000021#if OMPT_SUPPORT
22#include "ompt-specific.h"
23#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000024
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
26/* ------------------------------------------------------------------------ */
27/* ------------------------------------------------------------------------ */
28
29
30/* forward declaration */
31static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
34
Jim Cownie4cc4bb42014-10-07 16:25:50 +000035static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
36 switch (((kmp_flag_64 *)flag)->get_type()) {
37 case flag32: __kmp_resume_32(gtid, NULL); break;
38 case flag64: __kmp_resume_64(gtid, NULL); break;
39 case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
40 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000041}
42
43#ifdef BUILD_TIED_TASK_STACK
44
45//---------------------------------------------------------------------------
46// __kmp_trace_task_stack: print the tied tasks from the task stack in order
47// from top do bottom
48//
49// gtid: global thread identifier for thread containing stack
50// thread_data: thread data for task team thread containing stack
51// threshold: value above which the trace statement triggers
52// location: string identifying call site of this function (for trace)
53
54static void
55__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
56{
57 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
58 kmp_taskdata_t **stack_top = task_stack -> ts_top;
59 kmp_int32 entries = task_stack -> ts_entries;
60 kmp_taskdata_t *tied_task;
61
62 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
63 "first_block = %p, stack_top = %p \n",
64 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
65
66 KMP_DEBUG_ASSERT( stack_top != NULL );
67 KMP_DEBUG_ASSERT( entries > 0 );
68
69 while ( entries != 0 )
70 {
71 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
72 // fix up ts_top if we need to pop from previous block
73 if ( entries & TASK_STACK_INDEX_MASK == 0 )
74 {
75 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
76
77 stack_block = stack_block -> sb_prev;
78 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
79 }
80
81 // finish bookkeeping
82 stack_top--;
83 entries--;
84
85 tied_task = * stack_top;
86
87 KMP_DEBUG_ASSERT( tied_task != NULL );
88 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
89
90 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
91 "stack_top=%p, tied_task=%p\n",
92 location, gtid, entries, stack_top, tied_task ) );
93 }
94 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
95
96 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
97 location, gtid ) );
98}
99
100//---------------------------------------------------------------------------
101// __kmp_init_task_stack: initialize the task stack for the first time
102// after a thread_data structure is created.
103// It should not be necessary to do this again (assuming the stack works).
104//
105// gtid: global thread identifier of calling thread
106// thread_data: thread data for task team thread containing stack
107
108static void
109__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
110{
111 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
112 kmp_stack_block_t *first_block;
113
114 // set up the first block of the stack
115 first_block = & task_stack -> ts_first_block;
116 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
117 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
118
119 // initialize the stack to be empty
120 task_stack -> ts_entries = TASK_STACK_EMPTY;
121 first_block -> sb_next = NULL;
122 first_block -> sb_prev = NULL;
123}
124
125
126//---------------------------------------------------------------------------
127// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
128//
129// gtid: global thread identifier for calling thread
130// thread_data: thread info for thread containing stack
131
132static void
133__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
134{
135 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
136 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
137
138 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
139 // free from the second block of the stack
140 while ( stack_block != NULL ) {
141 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
142
143 stack_block -> sb_next = NULL;
144 stack_block -> sb_prev = NULL;
145 if (stack_block != & task_stack -> ts_first_block) {
146 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
147 }
148 stack_block = next_block;
149 }
150 // initialize the stack to be empty
151 task_stack -> ts_entries = 0;
152 task_stack -> ts_top = NULL;
153}
154
155
156//---------------------------------------------------------------------------
157// __kmp_push_task_stack: Push the tied task onto the task stack.
158// Grow the stack if necessary by allocating another block.
159//
160// gtid: global thread identifier for calling thread
161// thread: thread info for thread containing stack
162// tied_task: the task to push on the stack
163
164static void
165__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
166{
167 // GEH - need to consider what to do if tt_threads_data not allocated yet
168 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
169 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
170 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
171
172 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
173 return; // Don't push anything on stack if team or team tasks are serialized
174 }
175
176 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
177 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
178
179 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
180 gtid, thread, tied_task ) );
181 // Store entry
182 * (task_stack -> ts_top) = tied_task;
183
184 // Do bookkeeping for next push
185 task_stack -> ts_top++;
186 task_stack -> ts_entries++;
187
188 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
189 {
190 // Find beginning of this task block
191 kmp_stack_block_t *stack_block =
192 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
193
194 // Check if we already have a block
195 if ( stack_block -> sb_next != NULL )
196 { // reset ts_top to beginning of next block
197 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
198 }
199 else
200 { // Alloc new block and link it up
201 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
202 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
203
204 task_stack -> ts_top = & new_block -> sb_block[0];
205 stack_block -> sb_next = new_block;
206 new_block -> sb_prev = stack_block;
207 new_block -> sb_next = NULL;
208
209 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
210 gtid, tied_task, new_block ) );
211 }
212 }
213 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
214}
215
216//---------------------------------------------------------------------------
217// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
218// the task, just check to make sure it matches the ending task passed in.
219//
220// gtid: global thread identifier for the calling thread
221// thread: thread info structure containing stack
222// tied_task: the task popped off the stack
223// ending_task: the task that is ending (should match popped task)
224
225static void
226__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
227{
228 // GEH - need to consider what to do if tt_threads_data not allocated yet
229 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
230 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
231 kmp_taskdata_t *tied_task;
232
233 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
234 return; // Don't pop anything from stack if team or team tasks are serialized
235 }
236
237 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
238 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
239
240 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
241
242 // fix up ts_top if we need to pop from previous block
243 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
244 {
245 kmp_stack_block_t *stack_block =
246 (kmp_stack_block_t *) (task_stack -> ts_top) ;
247
248 stack_block = stack_block -> sb_prev;
249 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
250 }
251
252 // finish bookkeeping
253 task_stack -> ts_top--;
254 task_stack -> ts_entries--;
255
256 tied_task = * (task_stack -> ts_top );
257
258 KMP_DEBUG_ASSERT( tied_task != NULL );
259 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
260 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
261
262 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
263 return;
264}
265#endif /* BUILD_TIED_TASK_STACK */
266
267//---------------------------------------------------
268// __kmp_push_task: Add a task to the thread's deque
269
270static kmp_int32
271__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
272{
273 kmp_info_t * thread = __kmp_threads[ gtid ];
274 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
275 kmp_task_team_t * task_team = thread->th.th_task_team;
276 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
277 kmp_thread_data_t * thread_data;
278
279 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
280
281 // The first check avoids building task_team thread data if serialized
282 if ( taskdata->td_flags.task_serial ) {
283 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
284 gtid, taskdata ) );
285 return TASK_NOT_PUSHED;
286 }
287
288 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
289 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000290 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000291 __kmp_enable_tasking( task_team, thread );
292 }
293 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
294 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
295
296 // Find tasking deque specific to encountering thread
297 thread_data = & task_team -> tt.tt_threads_data[ tid ];
298
299 // No lock needed since only owner can allocate
300 if (thread_data -> td.td_deque == NULL ) {
301 __kmp_alloc_task_deque( thread, thread_data );
302 }
303
304 // Check if deque is full
305 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
306 {
307 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
308 gtid, taskdata ) );
309 return TASK_NOT_PUSHED;
310 }
311
312 // Lock the deque for the task push operation
313 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
314
315 // Must have room since no thread can add tasks but calling thread
316 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
317
318 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
319 // Wrap index.
320 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
321 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
322
323 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
324
325 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
326 "task=%p ntasks=%d head=%u tail=%u\n",
327 gtid, taskdata, thread_data->td.td_deque_ntasks,
328 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
329
330 return TASK_SUCCESSFULLY_PUSHED;
331}
332
333
334//-----------------------------------------------------------------------------------------
335// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
336// this_thr: thread structure to set current_task in.
337
338void
339__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
340{
341 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
342 "curtask_parent=%p\n",
343 0, this_thr, this_thr -> th.th_current_task,
344 this_thr -> th.th_current_task -> td_parent ) );
345
346 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
347
348 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
349 "curtask_parent=%p\n",
350 0, this_thr, this_thr -> th.th_current_task,
351 this_thr -> th.th_current_task -> td_parent ) );
352}
353
354
355//---------------------------------------------------------------------------------------
356// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
357// this_thr: thread structure to set up
358// team: team for implicit task data
359// tid: thread within team to set up
360
361void
362__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
363{
364 // current task of the thread is a parent of the new just created implicit tasks of new team
365 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
366 "parent_task=%p\n",
367 tid, this_thr, this_thr->th.th_current_task,
368 team->t.t_implicit_task_taskdata[tid].td_parent ) );
369
370 KMP_DEBUG_ASSERT (this_thr != NULL);
371
372 if( tid == 0 ) {
373 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
374 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
375 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
376 }
377 } else {
378 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
379 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
380 }
381
382 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
383 "parent_task=%p\n",
384 tid, this_thr, this_thr->th.th_current_task,
385 team->t.t_implicit_task_taskdata[tid].td_parent ) );
386}
387
388
389//----------------------------------------------------------------------
390// __kmp_task_start: bookkeeping for a task starting execution
391// GTID: global thread id of calling thread
392// task: task starting execution
393// current_task: task suspending
394
395static void
396__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
397{
398 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
399 kmp_info_t * thread = __kmp_threads[ gtid ];
400
401 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
402 gtid, taskdata, current_task) );
403
404 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
405
406 // mark currently executing task as suspended
407 // TODO: GEH - make sure root team implicit task is initialized properly.
408 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
409 current_task -> td_flags.executing = 0;
410
411 // Add task to stack if tied
412#ifdef BUILD_TIED_TASK_STACK
413 if ( taskdata -> td_flags.tiedness == TASK_TIED )
414 {
415 __kmp_push_task_stack( gtid, thread, taskdata );
416 }
417#endif /* BUILD_TIED_TASK_STACK */
418
419 // mark starting task as executing and as current task
420 thread -> th.th_current_task = taskdata;
421
422 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
423 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
424 taskdata -> td_flags.started = 1;
425 taskdata -> td_flags.executing = 1;
426 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
427 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
428
429 // GEH TODO: shouldn't we pass some sort of location identifier here?
430 // APT: yes, we will pass location here.
431 // need to store current thread state (in a thread or taskdata structure)
432 // before setting work_state, otherwise wrong state is set after end of task
433
434 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
435 gtid, taskdata ) );
436
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000437#if OMPT_SUPPORT
438 if ((ompt_status == ompt_status_track_callback) &&
439 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
440 kmp_taskdata_t *parent = taskdata->td_parent;
441 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
442 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
443 parent ? &(parent->ompt_task_info.frame) : NULL,
444 taskdata->ompt_task_info.task_id,
445 taskdata->ompt_task_info.function);
446 }
447#endif
448
Jim Cownie5e8470a2013-09-27 10:38:44 +0000449 return;
450}
451
452
453//----------------------------------------------------------------------
454// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
455// loc_ref: source location information; points to beginning of task block.
456// gtid: global thread number.
457// task: task thunk for the started task.
458
459void
460__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
461{
462 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
463 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
464
465 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
466 gtid, loc_ref, taskdata, current_task ) );
467
468 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
469 __kmp_task_start( gtid, task, current_task );
470
471 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
472 gtid, loc_ref, taskdata ) );
473
474 return;
475}
476
477#ifdef TASK_UNUSED
478//----------------------------------------------------------------------
479// __kmpc_omp_task_begin: report that a given task has started execution
480// NEVER GENERATED BY COMPILER, DEPRECATED!!!
481
482void
483__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
484{
485 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
486
487 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
488 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
489
490 __kmp_task_start( gtid, task, current_task );
491
492 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
493 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
494
495 return;
496}
497#endif // TASK_UNUSED
498
499
500//-------------------------------------------------------------------------------------
501// __kmp_free_task: free the current task space and the space for shareds
502// gtid: Global thread ID of calling thread
503// taskdata: task to free
504// thread: thread data structure of caller
505
506static void
507__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
508{
509 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
510 gtid, taskdata) );
511
512 // Check to make sure all flags and counters have the correct values
513 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
514 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
515 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
516 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
517 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
518 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
519
520 taskdata->td_flags.freed = 1;
521 // deallocate the taskdata and shared variable blocks associated with this task
522 #if USE_FAST_MEMORY
523 __kmp_fast_free( thread, taskdata );
524 #else /* ! USE_FAST_MEMORY */
525 __kmp_thread_free( thread, taskdata );
526 #endif
527
528 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
529 gtid, taskdata) );
530}
531
532//-------------------------------------------------------------------------------------
533// __kmp_free_task_and_ancestors: free the current task and ancestors without children
534//
535// gtid: Global thread ID of calling thread
536// taskdata: task to free
537// thread: thread data structure of caller
538
539static void
540__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
541{
542 kmp_int32 children = 0;
543 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
544
545 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
546
547 if ( !team_or_tasking_serialized ) {
548 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
549 KMP_DEBUG_ASSERT( children >= 0 );
550 }
551
552 // Now, go up the ancestor tree to see if any ancestors can now be freed.
553 while ( children == 0 )
554 {
555 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
556
557 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
558 "and freeing itself\n", gtid, taskdata) );
559
560 // --- Deallocate my ancestor task ---
561 __kmp_free_task( gtid, taskdata, thread );
562
563 taskdata = parent_taskdata;
564
565 // Stop checking ancestors at implicit task or if tasking serialized
566 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
567 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
568 return;
569
570 if ( !team_or_tasking_serialized ) {
571 // Predecrement simulated by "- 1" calculation
572 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
573 KMP_DEBUG_ASSERT( children >= 0 );
574 }
575 }
576
577 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
578 "not freeing it yet\n", gtid, taskdata, children) );
579}
580
581//---------------------------------------------------------------------
582// __kmp_task_finish: bookkeeping to do when a task finishes execution
583// gtid: global thread ID for calling thread
584// task: task to be finished
585// resumed_task: task to be resumed. (may be NULL if task is serialized)
586
587static void
588__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
589{
590 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
591 kmp_info_t * thread = __kmp_threads[ gtid ];
592 kmp_int32 children = 0;
593
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000594#if OMPT_SUPPORT
595 if ((ompt_status == ompt_status_track_callback) &&
596 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
597 kmp_taskdata_t *parent = taskdata->td_parent;
598 ompt_callbacks.ompt_callback(ompt_event_task_end)(
599 taskdata->ompt_task_info.task_id);
600 }
601#endif
602
Jim Cownie5e8470a2013-09-27 10:38:44 +0000603 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
604 gtid, taskdata, resumed_task) );
605
606 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
607
608 // Pop task from stack if tied
609#ifdef BUILD_TIED_TASK_STACK
610 if ( taskdata -> td_flags.tiedness == TASK_TIED )
611 {
612 __kmp_pop_task_stack( gtid, thread, taskdata );
613 }
614#endif /* BUILD_TIED_TASK_STACK */
615
Jim Cownie5e8470a2013-09-27 10:38:44 +0000616 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000617 taskdata -> td_flags.complete = 1; // mark the task as completed
618 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
619 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
620
621 // Only need to keep track of count if team parallel and tasking not serialized
622 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
623 // Predecrement simulated by "- 1" calculation
624 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
625 KMP_DEBUG_ASSERT( children >= 0 );
626#if OMP_40_ENABLED
627 if ( taskdata->td_taskgroup )
628 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000629 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000630#endif
631 }
632
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000633 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
634 // Othertwise, if a task is executed immediately from the release_deps code
635 // the flag will be reset to 1 again by this same function
636 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
637 taskdata -> td_flags.executing = 0; // suspend the finishing task
638
Jim Cownie5e8470a2013-09-27 10:38:44 +0000639 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
640 gtid, taskdata, children) );
641
Jim Cownie181b4bb2013-12-23 17:28:57 +0000642#if OMP_40_ENABLED
643 /* If the tasks' destructor thunk flag has been set, we need to invoke the
644 destructor thunk that has been generated by the compiler.
645 The code is placed here, since at this point other tasks might have been released
646 hence overlapping the destructor invokations with some other work in the
647 released tasks. The OpenMP spec is not specific on when the destructors are
648 invoked, so we should be free to choose.
649 */
650 if (taskdata->td_flags.destructors_thunk) {
651 kmp_routine_entry_t destr_thunk = task->destructors;
652 KMP_ASSERT(destr_thunk);
653 destr_thunk(gtid, task);
654 }
655#endif // OMP_40_ENABLED
656
Jim Cownie5e8470a2013-09-27 10:38:44 +0000657 // bookkeeping for resuming task:
658 // GEH - note tasking_ser => task_serial
659 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
660 taskdata->td_flags.task_serial);
661 if ( taskdata->td_flags.task_serial )
662 {
663 if (resumed_task == NULL) {
664 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
665 }
666 else {
667 // verify resumed task passed in points to parent
668 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
669 }
670 }
671 else {
672 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
673 }
674
675 // Free this task and then ancestor tasks if they have no children.
676 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
677
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000678 // FIXME johnmc: I this statement should be before the last one so if an
679 // asynchronous inquiry peers into the runtime system it doesn't see the freed
680 // task as the current task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000681 __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
682
683 // TODO: GEH - make sure root team implicit task is initialized properly.
684 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
685 resumed_task->td_flags.executing = 1; // resume previous task
686
687 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
688 gtid, taskdata, resumed_task) );
689
690 return;
691}
692
693//---------------------------------------------------------------------
694// __kmpc_omp_task_complete_if0: report that a task has completed execution
695// loc_ref: source location information; points to end of task block.
696// gtid: global thread number.
697// task: task thunk for the completed task.
698
699void
700__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
701{
702 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
703 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
704
705 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
706
707 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
708 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
709
710 return;
711}
712
713#ifdef TASK_UNUSED
714//---------------------------------------------------------------------
715// __kmpc_omp_task_complete: report that a task has completed execution
716// NEVER GENERATED BY COMPILER, DEPRECATED!!!
717
718void
719__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
720{
721 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
722 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
723
724 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
725
726 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
727 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
728 return;
729}
730#endif // TASK_UNUSED
731
732
Andrey Churbanove5f44922015-04-29 16:22:07 +0000733#if OMPT_SUPPORT
734//----------------------------------------------------------------------------------------------------
735// __kmp_task_init_ompt:
736// Initialize OMPT fields maintained by a task. Since the serial task is initialized before
737// ompt_initialize is called, at the point the serial task is initialized we don't know whether
738// OMPT will be used or not when the serial task is initialized. This function provides the support
739// needed to initialize OMPT for the serial task after the fact.
740
741void
742__kmp_task_init_ompt( kmp_taskdata_t * task, int tid )
743{
744 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
745 task->ompt_task_info.function = NULL;
746 task->ompt_task_info.frame = (ompt_frame_t) {
747 .exit_runtime_frame = NULL,
748 .reenter_runtime_frame = NULL
749 };
750}
751#endif
752
753
Jim Cownie5e8470a2013-09-27 10:38:44 +0000754//----------------------------------------------------------------------------------------------------
755// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
756//
757// loc_ref: reference to source location of parallel region
758// this_thr: thread data structure corresponding to implicit task
759// team: team for this_thr
760// tid: thread id of given thread within team
761// set_curr_task: TRUE if need to push current task to thread
762// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
763// TODO: Get better loc_ref. Value passed in may be NULL
764
765void
766__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
767{
768 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
769
770 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
771 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
772
773 task->td_task_id = KMP_GEN_TASK_ID();
774 task->td_team = team;
775// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
776 task->td_ident = loc_ref;
777 task->td_taskwait_ident = NULL;
778 task->td_taskwait_counter = 0;
779 task->td_taskwait_thread = 0;
780
781 task->td_flags.tiedness = TASK_TIED;
782 task->td_flags.tasktype = TASK_IMPLICIT;
783 // All implicit tasks are executed immediately, not deferred
784 task->td_flags.task_serial = 1;
785 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
786 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
787
788 task->td_flags.started = 1;
789 task->td_flags.executing = 1;
790 task->td_flags.complete = 0;
791 task->td_flags.freed = 0;
792
Jim Cownie181b4bb2013-12-23 17:28:57 +0000793#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000794 task->td_dephash = NULL;
795 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000796#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000797
798 if (set_curr_task) { // only do this initialization the first time a thread is created
799 task->td_incomplete_child_tasks = 0;
800 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
801#if OMP_40_ENABLED
802 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
803#endif
804 __kmp_push_current_task_to_thread( this_thr, team, tid );
805 } else {
806 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
807 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
808 }
809
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000810#if OMPT_SUPPORT
811 __kmp_task_init_ompt(task, tid);
812#endif
813
Jim Cownie5e8470a2013-09-27 10:38:44 +0000814 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
815 tid, team, task ) );
816}
817
818// Round up a size to a power of two specified by val
819// Used to insert padding between structures co-allocated using a single malloc() call
820static size_t
821__kmp_round_up_to_val( size_t size, size_t val ) {
822 if ( size & ( val - 1 ) ) {
823 size &= ~ ( val - 1 );
824 if ( size <= KMP_SIZE_T_MAX - val ) {
825 size += val; // Round up if there is no overflow.
826 }; // if
827 }; // if
828 return size;
829} // __kmp_round_up_to_va
830
831
832//---------------------------------------------------------------------------------
833// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
834//
835// loc_ref: source location information
836// gtid: global thread number.
837// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
838// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
839// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
840// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
841// task_entry: Pointer to task code entry point generated by compiler.
842// returns: a pointer to the allocated kmp_task_t structure (task).
843
844kmp_task_t *
845__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
846 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
847 kmp_routine_entry_t task_entry )
848{
849 kmp_task_t *task;
850 kmp_taskdata_t *taskdata;
851 kmp_info_t *thread = __kmp_threads[ gtid ];
852 kmp_team_t *team = thread->th.th_team;
853 kmp_taskdata_t *parent_task = thread->th.th_current_task;
854 size_t shareds_offset;
855
856 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
857 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
858 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
859 sizeof_shareds, task_entry) );
860
861 if ( parent_task->td_flags.final ) {
862 if (flags->merged_if0) {
863 }
864 flags->final = 1;
865 }
866
867 // Calculate shared structure offset including padding after kmp_task_t struct
868 // to align pointers in shared struct
869 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
870 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
871
872 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
873 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
874 gtid, shareds_offset) );
875 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
876 gtid, sizeof_shareds) );
877
878 // Avoid double allocation here by combining shareds with taskdata
879 #if USE_FAST_MEMORY
880 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
881 #else /* ! USE_FAST_MEMORY */
882 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
883 #endif /* USE_FAST_MEMORY */
884
885 task = KMP_TASKDATA_TO_TASK(taskdata);
886
887 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000888#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000889 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
890 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
891#else
892 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
893 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
894#endif
895 if (sizeof_shareds > 0) {
896 // Avoid double allocation here by combining shareds with taskdata
897 task->shareds = & ((char *) taskdata)[ shareds_offset ];
898 // Make sure shareds struct is aligned to pointer size
899 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
900 } else {
901 task->shareds = NULL;
902 }
903 task->routine = task_entry;
904 task->part_id = 0; // AC: Always start with 0 part id
905
906 taskdata->td_task_id = KMP_GEN_TASK_ID();
907 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000908 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000909 taskdata->td_parent = parent_task;
910 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
911 taskdata->td_ident = loc_ref;
912 taskdata->td_taskwait_ident = NULL;
913 taskdata->td_taskwait_counter = 0;
914 taskdata->td_taskwait_thread = 0;
915 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
916 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
917
918 taskdata->td_flags.tiedness = flags->tiedness;
919 taskdata->td_flags.final = flags->final;
920 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000921#if OMP_40_ENABLED
922 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
923#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000924 taskdata->td_flags.tasktype = TASK_EXPLICIT;
925
926 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
927 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
928
929 // GEH - TODO: fix this to copy parent task's value of team_serial flag
930 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
931
932 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
933 // tasks are not left until program termination to execute. Also, it helps locality to execute
934 // immediately.
935 taskdata->td_flags.task_serial = ( taskdata->td_flags.final
936 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
937
938 taskdata->td_flags.started = 0;
939 taskdata->td_flags.executing = 0;
940 taskdata->td_flags.complete = 0;
941 taskdata->td_flags.freed = 0;
942
943 taskdata->td_flags.native = flags->native;
944
945 taskdata->td_incomplete_child_tasks = 0;
946 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
947#if OMP_40_ENABLED
948 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
949 taskdata->td_dephash = NULL;
950 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000951#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000952 // Only need to keep track of child task counts if team parallel and tasking not serialized
953 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
954 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
955#if OMP_40_ENABLED
956 if ( parent_task->td_taskgroup )
957 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
958#endif
959 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
960 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
961 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
962 }
963 }
964
965 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
966 gtid, taskdata, taskdata->td_parent) );
967
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000968#if OMPT_SUPPORT
969 if (ompt_status & ompt_status_track) {
970 taskdata->ompt_task_info.task_id = __ompt_task_id_new(gtid);
971 taskdata->ompt_task_info.function = (void*) task_entry;
972 taskdata->ompt_task_info.frame = (ompt_frame_t)
973 { .exit_runtime_frame = NULL, .reenter_runtime_frame = NULL };
974 }
975#endif
976
Jim Cownie5e8470a2013-09-27 10:38:44 +0000977 return task;
978}
979
980
981kmp_task_t *
982__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
983 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
984 kmp_routine_entry_t task_entry )
985{
986 kmp_task_t *retval;
987 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
988
989 input_flags->native = FALSE;
990 // __kmp_task_alloc() sets up all other runtime flags
991
992 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
993 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
994 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
995 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
996
997 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
998 sizeof_shareds, task_entry );
999
1000 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1001
1002 return retval;
1003}
1004
1005//-----------------------------------------------------------
1006// __kmp_invoke_task: invoke the specified task
1007//
1008// gtid: global thread ID of caller
1009// task: the task to invoke
1010// current_task: the task to resume after task invokation
1011
1012static void
1013__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1014{
1015 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001016#if OMP_40_ENABLED
1017 int discard = 0 /* false */;
1018#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001019 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1020 gtid, taskdata, current_task) );
1021
1022 __kmp_task_start( gtid, task, current_task );
1023
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001024#if OMPT_SUPPORT
1025 ompt_thread_info_t oldInfo;
1026 kmp_info_t * thread;
1027 if (ompt_status & ompt_status_track) {
1028 // Store the threads states and restore them after the task
1029 thread = __kmp_threads[ gtid ];
1030 oldInfo = thread->th.ompt_thread_info;
1031 thread->th.ompt_thread_info.wait_id = 0;
1032 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1033 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1034 }
1035#endif
1036
Jim Cownie181b4bb2013-12-23 17:28:57 +00001037#if OMP_40_ENABLED
1038 // TODO: cancel tasks if the parallel region has also been cancelled
1039 // TODO: check if this sequence can be hoisted above __kmp_task_start
1040 // if cancellation has been enabled for this run ...
1041 if (__kmp_omp_cancellation) {
1042 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1043 kmp_team_t * this_team = this_thr->th.th_team;
1044 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1045 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1046 // this task belongs to a task group and we need to cancel it
1047 discard = 1 /* true */;
1048 }
1049 }
1050
Jim Cownie5e8470a2013-09-27 10:38:44 +00001051 //
1052 // Invoke the task routine and pass in relevant data.
1053 // Thunks generated by gcc take a different argument list.
1054 //
Jim Cownie181b4bb2013-12-23 17:28:57 +00001055 if (!discard) {
1056#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001057#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +00001058 if (taskdata->td_flags.native) {
1059 ((void (*)(void *))(*(task->routine)))(task->shareds);
1060 }
1061 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001062#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001063 {
1064 (*(task->routine))(gtid, task);
1065 }
1066#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001067 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001068#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001069
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001070
1071#if OMPT_SUPPORT
1072 if (ompt_status & ompt_status_track) {
1073 thread->th.ompt_thread_info = oldInfo;
1074 taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1075 }
1076#endif
1077
Jim Cownie5e8470a2013-09-27 10:38:44 +00001078 __kmp_task_finish( gtid, task, current_task );
1079
1080 KA_TRACE(30, ("__kmp_inovke_task(exit): T#%d completed task %p, resuming task %p\n",
1081 gtid, taskdata, current_task) );
1082 return;
1083}
1084
1085//-----------------------------------------------------------------------
1086// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1087//
1088// loc_ref: location of original task pragma (ignored)
1089// gtid: Global Thread ID of encountering thread
1090// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1091// Returns:
1092// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1093// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1094
1095kmp_int32
1096__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1097{
1098 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1099
1100 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1101 gtid, loc_ref, new_taskdata ) );
1102
1103 /* Should we execute the new task or queue it? For now, let's just always try to
1104 queue it. If the queue fills up, then we'll execute it. */
1105
1106 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1107 { // Execute this task immediately
1108 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1109 new_taskdata->td_flags.task_serial = 1;
1110 __kmp_invoke_task( gtid, new_task, current_task );
1111 }
1112
1113 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1114 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1115 new_taskdata ) );
1116
1117 return TASK_CURRENT_NOT_QUEUED;
1118}
1119
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001120//---------------------------------------------------------------------
1121// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1122// gtid: Global Thread ID of encountering thread
1123// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1124// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1125// returns:
1126//
1127// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1128// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1129kmp_int32
1130__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1131{
1132 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1133
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001134#if OMPT_SUPPORT
1135 if (ompt_status & ompt_status_track) {
1136 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1137 __builtin_frame_address(0);
1138 }
1139#endif
1140
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001141 /* Should we execute the new task or queue it? For now, let's just always try to
1142 queue it. If the queue fills up, then we'll execute it. */
1143
1144 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1145 { // Execute this task immediately
1146 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1147 if ( serialize_immediate )
1148 new_taskdata -> td_flags.task_serial = 1;
1149 __kmp_invoke_task( gtid, new_task, current_task );
1150 }
1151
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001152#if OMPT_SUPPORT
1153 if (ompt_status & ompt_status_track) {
1154 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1155 }
1156#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001157
1158 return TASK_CURRENT_NOT_QUEUED;
1159}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001160
1161//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001162// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1163// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001164// loc_ref: location of original task pragma (ignored)
1165// gtid: Global Thread ID of encountering thread
1166// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1167// returns:
1168//
1169// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1170// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1171
1172kmp_int32
1173__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1174{
1175 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001176 kmp_int32 res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001177
1178 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1179 gtid, loc_ref, new_taskdata ) );
1180
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001181 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001182
1183 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1184 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001185 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001186}
1187
Jim Cownie5e8470a2013-09-27 10:38:44 +00001188//-------------------------------------------------------------------------------------
1189// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1190
1191kmp_int32
1192__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1193{
1194 kmp_taskdata_t * taskdata;
1195 kmp_info_t * thread;
1196 int thread_finished = FALSE;
1197
1198 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1199 gtid, loc_ref) );
1200
1201 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1202 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1203
1204 thread = __kmp_threads[ gtid ];
1205 taskdata = thread -> th.th_current_task;
1206#if USE_ITT_BUILD
1207 // Note: These values are used by ITT events as well.
1208#endif /* USE_ITT_BUILD */
1209 taskdata->td_taskwait_counter += 1;
1210 taskdata->td_taskwait_ident = loc_ref;
1211 taskdata->td_taskwait_thread = gtid + 1;
1212
1213#if USE_ITT_BUILD
1214 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1215 if ( itt_sync_obj != NULL )
1216 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1217#endif /* USE_ITT_BUILD */
1218
1219 if ( ! taskdata->td_flags.team_serial ) {
1220 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001221 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001222 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001223 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1224 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001225 }
1226 }
1227#if USE_ITT_BUILD
1228 if ( itt_sync_obj != NULL )
1229 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1230#endif /* USE_ITT_BUILD */
1231
1232 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1233 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1234 }
1235
1236 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1237 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1238
1239 return TASK_CURRENT_NOT_QUEUED;
1240}
1241
1242
1243//-------------------------------------------------
1244// __kmpc_omp_taskyield: switch to a different task
1245
1246kmp_int32
1247__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1248{
1249 kmp_taskdata_t * taskdata;
1250 kmp_info_t * thread;
1251 int thread_finished = FALSE;
1252
1253 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1254 gtid, loc_ref, end_part) );
1255
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001256 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001257 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1258
1259 thread = __kmp_threads[ gtid ];
1260 taskdata = thread -> th.th_current_task;
1261 // Should we model this as a task wait or not?
1262#if USE_ITT_BUILD
1263 // Note: These values are used by ITT events as well.
1264#endif /* USE_ITT_BUILD */
1265 taskdata->td_taskwait_counter += 1;
1266 taskdata->td_taskwait_ident = loc_ref;
1267 taskdata->td_taskwait_thread = gtid + 1;
1268
1269#if USE_ITT_BUILD
1270 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1271 if ( itt_sync_obj != NULL )
1272 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1273#endif /* USE_ITT_BUILD */
1274 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001275 kmp_task_team_t * task_team = thread->th.th_task_team;
1276 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001277 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001278 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1279 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1280 }
1281 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001282 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001283#if USE_ITT_BUILD
1284 if ( itt_sync_obj != NULL )
1285 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1286#endif /* USE_ITT_BUILD */
1287
1288 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1289 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1290 }
1291
1292 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1293 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1294
1295 return TASK_CURRENT_NOT_QUEUED;
1296}
1297
1298
1299#if OMP_40_ENABLED
1300//-------------------------------------------------------------------------------------
1301// __kmpc_taskgroup: Start a new taskgroup
1302
1303void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001304__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001305{
1306 kmp_info_t * thread = __kmp_threads[ gtid ];
1307 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1308 kmp_taskgroup_t * tg_new =
1309 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1310 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1311 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001312 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001313 tg_new->parent = taskdata->td_taskgroup;
1314 taskdata->td_taskgroup = tg_new;
1315}
1316
1317
1318//-------------------------------------------------------------------------------------
1319// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1320// and its descendants are complete
1321
1322void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001323__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001324{
1325 kmp_info_t * thread = __kmp_threads[ gtid ];
1326 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1327 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1328 int thread_finished = FALSE;
1329
1330 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1331 KMP_DEBUG_ASSERT( taskgroup != NULL );
1332
1333 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1334#if USE_ITT_BUILD
1335 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1336 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1337 if ( itt_sync_obj != NULL )
1338 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1339#endif /* USE_ITT_BUILD */
1340
1341 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001342 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001343 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001344 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1345 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001346 }
1347 }
1348
1349#if USE_ITT_BUILD
1350 if ( itt_sync_obj != NULL )
1351 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1352#endif /* USE_ITT_BUILD */
1353 }
1354 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1355
1356 // Restore parent taskgroup for the current task
1357 taskdata->td_taskgroup = taskgroup->parent;
1358 __kmp_thread_free( thread, taskgroup );
1359
1360 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1361}
1362#endif
1363
1364
1365//------------------------------------------------------
1366// __kmp_remove_my_task: remove a task from my own deque
1367
1368static kmp_task_t *
1369__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1370 kmp_int32 is_constrained )
1371{
1372 kmp_task_t * task;
1373 kmp_taskdata_t * taskdata;
1374 kmp_thread_data_t *thread_data;
1375 kmp_uint32 tail;
1376
1377 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1378 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1379
1380 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1381
1382 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1383 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1384 thread_data->td.td_deque_tail) );
1385
1386 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1387 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1388 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1389 thread_data->td.td_deque_tail) );
1390 return NULL;
1391 }
1392
1393 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1394
1395 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1396 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1397 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1398 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1399 thread_data->td.td_deque_tail) );
1400 return NULL;
1401 }
1402
1403 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1404 taskdata = thread_data -> td.td_deque[ tail ];
1405
1406 if (is_constrained) {
1407 // we need to check if the candidate obeys task scheduling constraint:
1408 // only child of current task can be scheduled
1409 kmp_taskdata_t * current = thread->th.th_current_task;
1410 kmp_int32 level = current->td_level;
1411 kmp_taskdata_t * parent = taskdata->td_parent;
1412 while ( parent != current && parent->td_level > level ) {
1413 parent = parent->td_parent; // check generation up to the level of the current task
1414 KMP_DEBUG_ASSERT(parent != NULL);
1415 }
1416 if ( parent != current ) {
1417 // If the tail task is not a child, then no other childs can appear in the deque.
1418 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1419 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1420 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1421 thread_data->td.td_deque_tail) );
1422 return NULL;
1423 }
1424 }
1425
1426 thread_data -> td.td_deque_tail = tail;
1427 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1428
1429 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1430
1431 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1432 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1433 thread_data->td.td_deque_tail) );
1434
1435 task = KMP_TASKDATA_TO_TASK( taskdata );
1436 return task;
1437}
1438
1439
1440//-----------------------------------------------------------
1441// __kmp_steal_task: remove a task from another thread's deque
1442// Assume that calling thread has already checked existence of
1443// task_team thread_data before calling this routine.
1444
1445static kmp_task_t *
1446__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1447 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1448 kmp_int32 is_constrained )
1449{
1450 kmp_task_t * task;
1451 kmp_taskdata_t * taskdata;
1452 kmp_thread_data_t *victim_td, *threads_data;
1453 kmp_int32 victim_tid, thread_tid;
1454
1455 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1456
1457 threads_data = task_team -> tt.tt_threads_data;
1458 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1459
1460 victim_tid = victim->th.th_info.ds.ds_tid;
1461 victim_td = & threads_data[ victim_tid ];
1462
1463 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1464 "head=%u tail=%u\n",
1465 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1466 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1467
1468 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1469 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1470 {
1471 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1472 "ntasks=%d head=%u tail=%u\n",
1473 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1474 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1475 return NULL;
1476 }
1477
1478 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1479
1480 // Check again after we acquire the lock
1481 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1482 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1483 {
1484 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1485 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1486 "ntasks=%d head=%u tail=%u\n",
1487 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1488 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1489 return NULL;
1490 }
1491
1492 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1493
1494 if ( !is_constrained ) {
1495 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1496 // Bump head pointer and Wrap.
1497 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1498 } else {
1499 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1500 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1501 taskdata = victim_td -> td.td_deque[ tail ];
1502 // we need to check if the candidate obeys task scheduling constraint:
1503 // only child of current task can be scheduled
1504 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1505 kmp_int32 level = current->td_level;
1506 kmp_taskdata_t * parent = taskdata->td_parent;
1507 while ( parent != current && parent->td_level > level ) {
1508 parent = parent->td_parent; // check generation up to the level of the current task
1509 KMP_DEBUG_ASSERT(parent != NULL);
1510 }
1511 if ( parent != current ) {
1512 // If the tail task is not a child, then no other childs can appear in the deque (?).
1513 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1514 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1515 "ntasks=%d head=%u tail=%u\n",
1516 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1517 task_team, victim_td->td.td_deque_ntasks,
1518 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1519 return NULL;
1520 }
1521 victim_td -> td.td_deque_tail = tail;
1522 }
1523 if (*thread_finished) {
1524 // We need to un-mark this victim as a finished victim. This must be done before
1525 // releasing the lock, or else other threads (starting with the master victim)
1526 // might be prematurely released from the barrier!!!
1527 kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1528
1529 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1530 gtid, count + 1, task_team) );
1531
1532 *thread_finished = FALSE;
1533 }
1534 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1535
1536 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1537
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001538 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001539 "ntasks=%d head=%u tail=%u\n",
1540 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1541 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1542 victim_td->td.td_deque_tail) );
1543
1544 task = KMP_TASKDATA_TO_TASK( taskdata );
1545 return task;
1546}
1547
1548
1549//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001550// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001551// is statisfied (return true) or there are none left (return false).
1552// final_spin is TRUE if this is the spin at the release barrier.
1553// thread_finished indicates whether the thread is finished executing all
1554// the tasks it has on its deque, and is at the release barrier.
1555// spinner is the location on which to spin.
1556// spinner == NULL means only execute a single task and return.
1557// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001558template <class C>
1559static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1560 int *thread_finished
1561 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001562{
1563 kmp_task_team_t * task_team;
1564 kmp_team_t * team;
1565 kmp_thread_data_t * threads_data;
1566 kmp_task_t * task;
1567 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1568 volatile kmp_uint32 * unfinished_threads;
1569 kmp_int32 nthreads, last_stolen, k, tid;
1570
1571 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1572 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1573
1574 task_team = thread -> th.th_task_team;
1575 KMP_DEBUG_ASSERT( task_team != NULL );
1576
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001577 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001578 gtid, final_spin, *thread_finished) );
1579
1580 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1581 KMP_DEBUG_ASSERT( threads_data != NULL );
1582
1583 nthreads = task_team -> tt.tt_nproc;
1584 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1585 KMP_DEBUG_ASSERT( nthreads > 1 );
1586 KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1587
1588 // Choose tasks from our own work queue.
1589 start:
1590 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1591#if USE_ITT_BUILD && USE_ITT_NOTIFY
1592 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1593 if ( itt_sync_obj == NULL ) {
1594 // we are at fork barrier where we could not get the object reliably
1595 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1596 }
1597 __kmp_itt_task_starting( itt_sync_obj );
1598 }
1599#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1600 __kmp_invoke_task( gtid, task, current_task );
1601#if USE_ITT_BUILD
1602 if ( itt_sync_obj != NULL )
1603 __kmp_itt_task_finished( itt_sync_obj );
1604#endif /* USE_ITT_BUILD */
1605
1606 // If this thread is only partway through the barrier and the condition
1607 // is met, then return now, so that the barrier gather/release pattern can proceed.
1608 // If this thread is in the last spin loop in the barrier, waiting to be
1609 // released, we know that the termination condition will not be satisified,
1610 // so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001611 if (flag == NULL || (!final_spin && flag->done_check())) {
1612 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001613 return TRUE;
1614 }
1615 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1616 }
1617
1618 // This thread's work queue is empty. If we are in the final spin loop
1619 // of the barrier, check and see if the termination condition is satisfied.
1620 if (final_spin) {
1621 // First, decrement the #unfinished threads, if that has not already
1622 // been done. This decrement might be to the spin location, and
1623 // result in the termination condition being satisfied.
1624 if (! *thread_finished) {
1625 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001626 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001627 gtid, count, task_team) );
1628 *thread_finished = TRUE;
1629 }
1630
1631 // It is now unsafe to reference thread->th.th_team !!!
1632 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1633 // thread to pass through the barrier, where it might reset each thread's
1634 // th.th_team field for the next parallel region.
1635 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001636 if (flag != NULL && flag->done_check()) {
1637 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001638 return TRUE;
1639 }
1640 }
1641
1642 // Try to steal from the last place I stole from successfully.
1643 tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1644 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1645
1646 if (last_stolen != -1) {
1647 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1648
1649 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1650 thread_finished, is_constrained )) != NULL)
1651 {
1652#if USE_ITT_BUILD && USE_ITT_NOTIFY
1653 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1654 if ( itt_sync_obj == NULL ) {
1655 // we are at fork barrier where we could not get the object reliably
1656 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1657 }
1658 __kmp_itt_task_starting( itt_sync_obj );
1659 }
1660#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1661 __kmp_invoke_task( gtid, task, current_task );
1662#if USE_ITT_BUILD
1663 if ( itt_sync_obj != NULL )
1664 __kmp_itt_task_finished( itt_sync_obj );
1665#endif /* USE_ITT_BUILD */
1666
1667 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001668 if (flag == NULL || (!final_spin && flag->done_check())) {
1669 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001670 gtid) );
1671 return TRUE;
1672 }
1673
1674 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1675 // If the execution of the stolen task resulted in more tasks being
1676 // placed on our run queue, then restart the whole process.
1677 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001678 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001679 gtid) );
1680 goto start;
1681 }
1682 }
1683
1684 // Don't give priority to stealing from this thread anymore.
1685 threads_data[ tid ].td.td_deque_last_stolen = -1;
1686
1687 // The victims's work queue is empty. If we are in the final spin loop
1688 // of the barrier, check and see if the termination condition is satisfied.
1689 if (final_spin) {
1690 // First, decrement the #unfinished threads, if that has not already
1691 // been done. This decrement might be to the spin location, and
1692 // result in the termination condition being satisfied.
1693 if (! *thread_finished) {
1694 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001695 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001696 "task_team=%p\n", gtid, count, task_team) );
1697 *thread_finished = TRUE;
1698 }
1699
1700 // If __kmp_tasking_mode != tskm_immediate_exec
1701 // then it is now unsafe to reference thread->th.th_team !!!
1702 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1703 // thread to pass through the barrier, where it might reset each thread's
1704 // th.th_team field for the next parallel region.
1705 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001706 if (flag != NULL && flag->done_check()) {
1707 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001708 gtid) );
1709 return TRUE;
1710 }
1711 }
1712 }
1713
1714 // Find a different thread to steal work from. Pick a random thread.
1715 // My initial plan was to cycle through all the threads, and only return
1716 // if we tried to steal from every thread, and failed. Arch says that's
1717 // not such a great idea.
1718 // GEH - need yield code in this loop for throughput library mode?
1719 new_victim:
1720 k = __kmp_get_random( thread ) % (nthreads - 1);
1721 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1722 ++k; // Adjusts random distribution to exclude self
1723 }
1724 {
1725 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1726 int first;
1727
1728 // There is a slight chance that __kmp_enable_tasking() did not wake up
1729 // all threads waiting at the barrier. If this thread is sleeping, then
1730 // then wake it up. Since we weree going to pay the cache miss penalty
1731 // for referenceing another thread's kmp_info_t struct anyway, the check
1732 // shouldn't cost too much performance at this point.
1733 // In extra barrier mode, tasks do not sleep at the separate tasking
1734 // barrier, so this isn't a problem.
1735 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1736 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1737 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1738 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001739 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001740 // A sleeping thread should not have any tasks on it's queue.
Alp Toker8f2d3f02014-02-24 10:40:15 +00001741 // There is a slight possibility that it resumes, steals a task from
Jim Cownie5e8470a2013-09-27 10:38:44 +00001742 // another thread, which spawns more tasks, all in the that it takes
1743 // this thread to check => don't write an assertion that the victim's
1744 // queue is empty. Try stealing from a different thread.
1745 goto new_victim;
1746 }
1747
1748 // Now try to steal work from the selected thread
1749 first = TRUE;
1750 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1751 thread_finished, is_constrained )) != NULL)
1752 {
1753#if USE_ITT_BUILD && USE_ITT_NOTIFY
1754 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1755 if ( itt_sync_obj == NULL ) {
1756 // we are at fork barrier where we could not get the object reliably
1757 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1758 }
1759 __kmp_itt_task_starting( itt_sync_obj );
1760 }
1761#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1762 __kmp_invoke_task( gtid, task, current_task );
1763#if USE_ITT_BUILD
1764 if ( itt_sync_obj != NULL )
1765 __kmp_itt_task_finished( itt_sync_obj );
1766#endif /* USE_ITT_BUILD */
1767
1768 // Try stealing from this victim again, in the future.
1769 if (first) {
1770 threads_data[ tid ].td.td_deque_last_stolen = k;
1771 first = FALSE;
1772 }
1773
1774 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001775 if (flag == NULL || (!final_spin && flag->done_check())) {
1776 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001777 gtid) );
1778 return TRUE;
1779 }
1780 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1781
1782 // If the execution of the stolen task resulted in more tasks being
1783 // placed on our run queue, then restart the whole process.
1784 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001785 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001786 gtid) );
1787 goto start;
1788 }
1789 }
1790
1791 // The victims's work queue is empty. If we are in the final spin loop
1792 // of the barrier, check and see if the termination condition is satisfied.
1793 // Going on and finding a new victim to steal from is expensive, as it
1794 // involves a lot of cache misses, so we definitely want to re-check the
1795 // termination condition before doing that.
1796 if (final_spin) {
1797 // First, decrement the #unfinished threads, if that has not already
1798 // been done. This decrement might be to the spin location, and
1799 // result in the termination condition being satisfied.
1800 if (! *thread_finished) {
1801 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001802 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001803 "task_team=%p\n",
1804 gtid, count, task_team) );
1805 *thread_finished = TRUE;
1806 }
1807
1808 // If __kmp_tasking_mode != tskm_immediate_exec,
1809 // then it is now unsafe to reference thread->th.th_team !!!
1810 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1811 // thread to pass through the barrier, where it might reset each thread's
1812 // th.th_team field for the next parallel region.
1813 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001814 if (flag != NULL && flag->done_check()) {
1815 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001816 return TRUE;
1817 }
1818 }
1819 }
1820
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001821 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001822 return FALSE;
1823}
1824
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001825int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1826 int *thread_finished
1827 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1828{
1829 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1830 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1831}
1832
1833int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
1834 int *thread_finished
1835 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1836{
1837 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1838 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1839}
1840
1841int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
1842 int *thread_finished
1843 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1844{
1845 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1846 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1847}
1848
1849
Jim Cownie5e8470a2013-09-27 10:38:44 +00001850
1851//-----------------------------------------------------------------------------
1852// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
1853// next barrier so they can assist in executing enqueued tasks.
1854// First thread in allocates the task team atomically.
1855
1856static void
1857__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
1858{
1859 kmp_team_t *team = this_thr->th.th_team;
1860 kmp_thread_data_t *threads_data;
1861 int nthreads, i, is_init_thread;
1862
1863 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
1864 __kmp_gtid_from_thread( this_thr ) ) );
1865
1866 KMP_DEBUG_ASSERT(task_team != NULL);
1867 KMP_DEBUG_ASSERT(team != NULL);
1868
1869 nthreads = task_team->tt.tt_nproc;
1870 KMP_DEBUG_ASSERT(nthreads > 0);
1871 KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
1872
1873 // Allocate or increase the size of threads_data if necessary
1874 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
1875
1876 if (!is_init_thread) {
1877 // Some other thread already set up the array.
1878 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
1879 __kmp_gtid_from_thread( this_thr ) ) );
1880 return;
1881 }
1882 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1883 KMP_DEBUG_ASSERT( threads_data != NULL );
1884
1885 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1886 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
1887 {
1888 // Release any threads sleeping at the barrier, so that they can steal
1889 // tasks and execute them. In extra barrier mode, tasks do not sleep
1890 // at the separate tasking barrier, so this isn't a problem.
1891 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001892 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001893 kmp_info_t *thread = threads_data[i].td.td_thr;
1894
1895 if (i == this_thr->th.th_info.ds.ds_tid) {
1896 continue;
1897 }
1898 // Since we haven't locked the thread's suspend mutex lock at this
1899 // point, there is a small window where a thread might be putting
1900 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001901 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00001902 // see if other threads are sleeping (using the same random
1903 // mechanism that is used for task stealing) and awakens them if
1904 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001905 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001906 {
1907 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
1908 __kmp_gtid_from_thread( this_thr ),
1909 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001910 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001911 }
1912 else {
1913 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
1914 __kmp_gtid_from_thread( this_thr ),
1915 __kmp_gtid_from_thread( thread ) ) );
1916 }
1917 }
1918 }
1919
1920 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
1921 __kmp_gtid_from_thread( this_thr ) ) );
1922}
1923
1924
1925/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001926/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00001927 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
1928 * like a shadow of the kmp_team_t data struct, with a different lifetime.
1929 * After a child * thread checks into a barrier and calls __kmp_release() from
1930 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
1931 * longer assume that the kmp_team_t structure is intact (at any moment, the
1932 * master thread may exit the barrier code and free the team data structure,
1933 * and return the threads to the thread pool).
1934 *
1935 * This does not work with the the tasking code, as the thread is still
1936 * expected to participate in the execution of any tasks that may have been
1937 * spawned my a member of the team, and the thread still needs access to all
1938 * to each thread in the team, so that it can steal work from it.
1939 *
1940 * Enter the existence of the kmp_task_team_t struct. It employs a reference
1941 * counting mechanims, and is allocated by the master thread before calling
1942 * __kmp_<barrier_kind>_release, and then is release by the last thread to
1943 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
1944 * of the kmp_task_team_t structs for consecutive barriers can overlap
1945 * (and will, unless the master thread is the last thread to exit the barrier
1946 * release phase, which is not typical).
1947 *
1948 * The existence of such a struct is useful outside the context of tasking,
1949 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
1950 * so that any performance differences show up when comparing the 2.5 vs. 3.0
1951 * libraries.
1952 *
1953 * We currently use the existence of the threads array as an indicator that
1954 * tasks were spawned since the last barrier. If the structure is to be
1955 * useful outside the context of tasking, then this will have to change, but
1956 * not settting the field minimizes the performance impact of tasking on
1957 * barriers, when no explicit tasks were spawned (pushed, actually).
1958 */
1959
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001960
Jim Cownie5e8470a2013-09-27 10:38:44 +00001961static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
1962// Lock for task team data structures
1963static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
1964
1965
1966//------------------------------------------------------------------------------
1967// __kmp_alloc_task_deque:
1968// Allocates a task deque for a particular thread, and initialize the necessary
1969// data structures relating to the deque. This only happens once per thread
1970// per task team since task teams are recycled.
1971// No lock is needed during allocation since each thread allocates its own
1972// deque.
1973
1974static void
1975__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
1976{
1977 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
1978 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
1979
1980 // Initialize last stolen task field to "none"
1981 thread_data -> td.td_deque_last_stolen = -1;
1982
1983 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
1984 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
1985 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
1986
1987 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
1988 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
1989 // Allocate space for task deque, and zero the deque
1990 // Cannot use __kmp_thread_calloc() because threads not around for
1991 // kmp_reap_task_team( ).
1992 thread_data -> td.td_deque = (kmp_taskdata_t **)
1993 __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
1994}
1995
1996
1997//------------------------------------------------------------------------------
1998// __kmp_free_task_deque:
1999// Deallocates a task deque for a particular thread.
2000// Happens at library deallocation so don't need to reset all thread data fields.
2001
2002static void
2003__kmp_free_task_deque( kmp_thread_data_t *thread_data )
2004{
2005 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2006
2007 if ( thread_data -> td.td_deque != NULL ) {
2008 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2009 __kmp_free( thread_data -> td.td_deque );
2010 thread_data -> td.td_deque = NULL;
2011 }
2012 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2013
2014#ifdef BUILD_TIED_TASK_STACK
2015 // GEH: Figure out what to do here for td_susp_tied_tasks
2016 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2017 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2018 }
2019#endif // BUILD_TIED_TASK_STACK
2020}
2021
2022
2023//------------------------------------------------------------------------------
2024// __kmp_realloc_task_threads_data:
2025// Allocates a threads_data array for a task team, either by allocating an initial
2026// array or enlarging an existing array. Only the first thread to get the lock
2027// allocs or enlarges the array and re-initializes the array eleemnts.
2028// That thread returns "TRUE", the rest return "FALSE".
2029// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2030// The current size is given by task_team -> tt.tt_max_threads.
2031
2032static int
2033__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2034{
2035 kmp_thread_data_t ** threads_data_p;
2036 kmp_int32 nthreads, maxthreads;
2037 int is_init_thread = FALSE;
2038
2039 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2040 // Already reallocated and initialized.
2041 return FALSE;
2042 }
2043
2044 threads_data_p = & task_team -> tt.tt_threads_data;
2045 nthreads = task_team -> tt.tt_nproc;
2046 maxthreads = task_team -> tt.tt_max_threads;
2047
2048 // All threads must lock when they encounter the first task of the implicit task
2049 // region to make sure threads_data fields are (re)initialized before used.
2050 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2051
2052 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2053 // first thread to enable tasking
2054 kmp_team_t *team = thread -> th.th_team;
2055 int i;
2056
2057 is_init_thread = TRUE;
2058 if ( maxthreads < nthreads ) {
2059
2060 if ( *threads_data_p != NULL ) {
2061 kmp_thread_data_t *old_data = *threads_data_p;
2062 kmp_thread_data_t *new_data = NULL;
2063
2064 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2065 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2066 __kmp_gtid_from_thread( thread ), task_team,
2067 nthreads, maxthreads ) );
2068 // Reallocate threads_data to have more elements than current array
2069 // Cannot use __kmp_thread_realloc() because threads not around for
2070 // kmp_reap_task_team( ). Note all new array entries are initialized
2071 // to zero by __kmp_allocate().
2072 new_data = (kmp_thread_data_t *)
2073 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2074 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002075 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
2076 (void *) old_data,
2077 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002078
2079#ifdef BUILD_TIED_TASK_STACK
2080 // GEH: Figure out if this is the right thing to do
2081 for (i = maxthreads; i < nthreads; i++) {
2082 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2083 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2084 }
2085#endif // BUILD_TIED_TASK_STACK
2086 // Install the new data and free the old data
2087 (*threads_data_p) = new_data;
2088 __kmp_free( old_data );
2089 }
2090 else {
2091 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2092 "threads data for task_team %p, size = %d\n",
2093 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2094 // Make the initial allocate for threads_data array, and zero entries
2095 // Cannot use __kmp_thread_calloc() because threads not around for
2096 // kmp_reap_task_team( ).
2097 *threads_data_p = (kmp_thread_data_t *)
2098 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2099#ifdef BUILD_TIED_TASK_STACK
2100 // GEH: Figure out if this is the right thing to do
2101 for (i = 0; i < nthreads; i++) {
2102 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2103 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2104 }
2105#endif // BUILD_TIED_TASK_STACK
2106 }
2107 task_team -> tt.tt_max_threads = nthreads;
2108 }
2109 else {
2110 // If array has (more than) enough elements, go ahead and use it
2111 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2112 }
2113
2114 // initialize threads_data pointers back to thread_info structures
2115 for (i = 0; i < nthreads; i++) {
2116 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2117 thread_data -> td.td_thr = team -> t.t_threads[i];
2118
2119 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2120 // The last stolen field survives across teams / barrier, and the number
2121 // of threads may have changed. It's possible (likely?) that a new
2122 // parallel region will exhibit the same behavior as the previous region.
2123 thread_data -> td.td_deque_last_stolen = -1;
2124 }
2125 }
2126
2127 KMP_MB();
2128 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2129 }
2130
2131 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2132 return is_init_thread;
2133}
2134
2135
2136//------------------------------------------------------------------------------
2137// __kmp_free_task_threads_data:
2138// Deallocates a threads_data array for a task team, including any attached
2139// tasking deques. Only occurs at library shutdown.
2140
2141static void
2142__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2143{
2144 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2145 if ( task_team -> tt.tt_threads_data != NULL ) {
2146 int i;
2147 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2148 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2149 }
2150 __kmp_free( task_team -> tt.tt_threads_data );
2151 task_team -> tt.tt_threads_data = NULL;
2152 }
2153 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2154}
2155
2156
2157//------------------------------------------------------------------------------
2158// __kmp_allocate_task_team:
2159// Allocates a task team associated with a specific team, taking it from
2160// the global task team free list if possible. Also initializes data structures.
2161
2162static kmp_task_team_t *
2163__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2164{
2165 kmp_task_team_t *task_team = NULL;
2166 int nthreads;
2167
2168 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2169 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2170
2171 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2172 // Take a task team from the task team pool
2173 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2174 if (__kmp_free_task_teams != NULL) {
2175 task_team = __kmp_free_task_teams;
2176 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2177 task_team -> tt.tt_next = NULL;
2178 }
2179 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2180 }
2181
2182 if (task_team == NULL) {
2183 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2184 "task team for team %p\n",
2185 __kmp_gtid_from_thread( thread ), team ) );
2186 // Allocate a new task team if one is not available.
2187 // Cannot use __kmp_thread_malloc() because threads not around for
2188 // kmp_reap_task_team( ).
2189 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2190 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2191 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2192 //task_team -> tt.tt_max_threads = 0;
2193 //task_team -> tt.tt_next = NULL;
2194 }
2195
2196 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2197 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2198
Jim Cownie5e8470a2013-09-27 10:38:44 +00002199 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2200 TCW_4( task_team -> tt.tt_active, TRUE );
2201 TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2202
2203 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2204 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2205 return task_team;
2206}
2207
2208
2209//------------------------------------------------------------------------------
2210// __kmp_free_task_team:
2211// Frees the task team associated with a specific thread, and adds it
2212// to the global task team free list.
2213//
2214
2215static void
2216__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2217{
2218 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2219 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2220
2221 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2222
2223 // Put task team back on free list
2224 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2225
2226 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2227 task_team -> tt.tt_next = __kmp_free_task_teams;
2228 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2229 TCW_PTR(__kmp_free_task_teams, task_team);
2230
2231 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2232}
2233
2234
2235//------------------------------------------------------------------------------
2236// __kmp_reap_task_teams:
2237// Free all the task teams on the task team free list.
2238// Should only be done during library shutdown.
2239// Cannot do anything that needs a thread structure or gtid since they are already gone.
2240
2241void
2242__kmp_reap_task_teams( void )
2243{
2244 kmp_task_team_t *task_team;
2245
2246 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2247 // Free all task_teams on the free list
2248 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2249 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2250 __kmp_free_task_teams = task_team -> tt.tt_next;
2251 task_team -> tt.tt_next = NULL;
2252
2253 // Free threads_data if necessary
2254 if ( task_team -> tt.tt_threads_data != NULL ) {
2255 __kmp_free_task_threads_data( task_team );
2256 }
2257 __kmp_free( task_team );
2258 }
2259 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2260 }
2261}
2262
2263
2264//------------------------------------------------------------------------------
2265// __kmp_unref_task_teams:
2266// Remove one thread from referencing the task team structure by
2267// decreasing the reference count and deallocate task team if no more
2268// references to it.
2269//
2270void
2271__kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2272{
2273 kmp_uint ref_ct;
2274
2275 ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2276
2277 KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2278 __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2279
2280
2281 if ( ref_ct == 0 ) {
2282 __kmp_free_task_team( thread, task_team );
2283 }
2284
2285 TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2286}
2287
2288
2289//------------------------------------------------------------------------------
2290// __kmp_wait_to_unref_task_teams:
2291// Some threads could still be in the fork barrier release code, possibly
2292// trying to steal tasks. Wait for each thread to unreference its task team.
2293//
2294void
2295__kmp_wait_to_unref_task_teams(void)
2296{
2297 kmp_info_t *thread;
2298 kmp_uint32 spins;
2299 int done;
2300
2301 KMP_INIT_YIELD( spins );
2302
2303
2304 for (;;) {
2305 done = TRUE;
2306
2307 // TODO: GEH - this may be is wrong because some sync would be necessary
2308 // in case threads are added to the pool during the traversal.
2309 // Need to verify that lock for thread pool is held when calling
2310 // this routine.
2311 for (thread = (kmp_info_t *)__kmp_thread_pool;
2312 thread != NULL;
2313 thread = thread->th.th_next_pool)
2314 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002315#if KMP_OS_WINDOWS
2316 DWORD exit_val;
2317#endif
2318 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2319 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2320 __kmp_gtid_from_thread( thread ) ) );
2321 continue;
2322 }
2323#if KMP_OS_WINDOWS
2324 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2325 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2326 if (TCR_PTR(thread->th.th_task_team) != NULL) {
2327 __kmp_unref_task_team( thread->th.th_task_team, thread );
2328 }
2329 continue;
2330 }
2331#endif
2332
2333 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2334
2335 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2336 __kmp_gtid_from_thread( thread ) ) );
2337
2338 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002339 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002340 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002341 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002342 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2343 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002344 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002345 }
2346 }
2347 }
2348 if (done) {
2349 break;
2350 }
2351
2352 // If we are oversubscribed,
2353 // or have waited a bit (and library mode is throughput), yield.
2354 // Pause is in the following code.
2355 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2356 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2357 }
2358
2359
2360}
2361
2362
2363//------------------------------------------------------------------------------
2364// __kmp_task_team_setup: Create a task_team for the current team, but use
2365// an already created, unused one if it already exists.
2366// This may be called by any thread, but only for teams with # threads >1.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002367void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002368__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002369{
2370 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2371
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002372 if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( team->t.t_nproc > 1 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002373 // Allocate a new task team, which will be propagated to
2374 // all of the worker threads after the barrier. As they
2375 // spin in the barrier release phase, then will continue
2376 // to use the previous task team struct, until they receive
2377 // the signal to stop checking for tasks (they can't safely
2378 // reference the kmp_team_t struct, which could be reallocated
2379 // by the master thread).
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002380 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2381 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2382 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jim Cownie5e8470a2013-09-27 10:38:44 +00002383 ((team != NULL) ? team->t.t_id : -1)) );
2384 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002385 //else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002386 // All threads have reported in, and no tasks were spawned
2387 // for this release->gather region. Leave the old task
2388 // team struct in place for the upcoming region. No task
2389 // teams are formed for serialized teams.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002390 if (both) {
2391 int other_team = 1 - this_thr->th.th_task_state;
2392 if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well
2393 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2394 KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2395 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2396 ((team != NULL) ? team->t.t_id : -1)) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002397 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002398 }
2399}
2400
2401
2402//------------------------------------------------------------------------------
2403// __kmp_task_team_sync: Propagation of task team data from team to threads
2404// which happens just after the release phase of a team barrier. This may be
2405// called by any thread, but only for teams with # threads > 1.
2406
2407void
2408__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2409{
2410 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2411
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002412 // In case this thread never saw that the task team was no longer active, unref/deallocate it now.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002413 if ( this_thr->th.th_task_team != NULL ) {
2414 if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2415 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2416 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002417 } else { // We are re-using a task team that was never enabled.
2418 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002419 }
2420 }
2421
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002422 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jim Cownie5e8470a2013-09-27 10:38:44 +00002423 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002424 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2425 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002426 KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2427 __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2428 this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2429}
2430
2431
2432//------------------------------------------------------------------------------
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002433// __kmp_task_team_wait: Master thread waits for outstanding tasks after the
2434// barrier gather phase. Only called by master thread if #threads in team > 1 !
Jim Cownie5e8470a2013-09-27 10:38:44 +00002435void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002436__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002437 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002438 )
2439{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002440 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002441
2442 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2443 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2444
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002445 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002446 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2447 __kmp_gtid_from_thread( this_thr ), task_team ) );
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002448 // All worker threads might have dropped through to the release phase, but could still
2449 // be executing tasks. Wait here for all tasks to complete. To avoid memory contention,
2450 // only the master thread checks for the termination condition.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002451 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2452 flag.wait(this_thr, TRUE
2453 USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002454
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002455 // Kill the old task team, so that the worker threads will stop referencing it while spinning.
2456 // They will deallocate it when the reference count reaches zero.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002457 // The master thread is not included in the ref count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002458 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2459 __kmp_gtid_from_thread( this_thr ), task_team ) );
2460 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2461 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2462 KMP_MB();
2463
2464 TCW_PTR(this_thr->th.th_task_team, NULL);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002465 team->t.t_task_team[this_thr->th.th_task_state] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002466 }
2467}
2468
2469
2470//------------------------------------------------------------------------------
2471// __kmp_tasking_barrier:
2472// Internal function to execute all tasks prior to a regular barrier or a
2473// join barrier. It is a full barrier itself, which unfortunately turns
2474// regular barriers into double barriers and join barriers into 1 1/2
2475// barriers.
2476// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2477
2478void
2479__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2480{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002481 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002482 int flag = FALSE;
2483 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2484
2485#if USE_ITT_BUILD
2486 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2487#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002488 kmp_flag_32 spin_flag(spin, 0U);
2489 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2490 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002491#if USE_ITT_BUILD
2492 // TODO: What about itt_sync_obj??
2493 KMP_FSYNC_SPIN_PREPARE( spin );
2494#endif /* USE_ITT_BUILD */
2495
2496 if( TCR_4(__kmp_global.g.g_done) ) {
2497 if( __kmp_global.g.g_abort )
2498 __kmp_abort_thread( );
2499 break;
2500 }
2501 KMP_YIELD( TRUE ); // GH: We always yield here
2502 }
2503#if USE_ITT_BUILD
2504 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2505#endif /* USE_ITT_BUILD */
2506}
2507