blob: a8b3435ff246cce211e4d320433d0998d6a85186 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000020
21
Jim Cownie5e8470a2013-09-27 10:38:44 +000022
23/* ------------------------------------------------------------------------ */
24/* ------------------------------------------------------------------------ */
25
26
27/* forward declaration */
28static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
29static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
30static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
31
Jim Cownie4cc4bb42014-10-07 16:25:50 +000032static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
33 switch (((kmp_flag_64 *)flag)->get_type()) {
34 case flag32: __kmp_resume_32(gtid, NULL); break;
35 case flag64: __kmp_resume_64(gtid, NULL); break;
36 case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
37 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000038}
39
40#ifdef BUILD_TIED_TASK_STACK
41
42//---------------------------------------------------------------------------
43// __kmp_trace_task_stack: print the tied tasks from the task stack in order
44// from top do bottom
45//
46// gtid: global thread identifier for thread containing stack
47// thread_data: thread data for task team thread containing stack
48// threshold: value above which the trace statement triggers
49// location: string identifying call site of this function (for trace)
50
51static void
52__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
53{
54 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
55 kmp_taskdata_t **stack_top = task_stack -> ts_top;
56 kmp_int32 entries = task_stack -> ts_entries;
57 kmp_taskdata_t *tied_task;
58
59 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
60 "first_block = %p, stack_top = %p \n",
61 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
62
63 KMP_DEBUG_ASSERT( stack_top != NULL );
64 KMP_DEBUG_ASSERT( entries > 0 );
65
66 while ( entries != 0 )
67 {
68 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
69 // fix up ts_top if we need to pop from previous block
70 if ( entries & TASK_STACK_INDEX_MASK == 0 )
71 {
72 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
73
74 stack_block = stack_block -> sb_prev;
75 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
76 }
77
78 // finish bookkeeping
79 stack_top--;
80 entries--;
81
82 tied_task = * stack_top;
83
84 KMP_DEBUG_ASSERT( tied_task != NULL );
85 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
86
87 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
88 "stack_top=%p, tied_task=%p\n",
89 location, gtid, entries, stack_top, tied_task ) );
90 }
91 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
92
93 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
94 location, gtid ) );
95}
96
97//---------------------------------------------------------------------------
98// __kmp_init_task_stack: initialize the task stack for the first time
99// after a thread_data structure is created.
100// It should not be necessary to do this again (assuming the stack works).
101//
102// gtid: global thread identifier of calling thread
103// thread_data: thread data for task team thread containing stack
104
105static void
106__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
107{
108 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
109 kmp_stack_block_t *first_block;
110
111 // set up the first block of the stack
112 first_block = & task_stack -> ts_first_block;
113 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
114 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
115
116 // initialize the stack to be empty
117 task_stack -> ts_entries = TASK_STACK_EMPTY;
118 first_block -> sb_next = NULL;
119 first_block -> sb_prev = NULL;
120}
121
122
123//---------------------------------------------------------------------------
124// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
125//
126// gtid: global thread identifier for calling thread
127// thread_data: thread info for thread containing stack
128
129static void
130__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
131{
132 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
133 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
134
135 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
136 // free from the second block of the stack
137 while ( stack_block != NULL ) {
138 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
139
140 stack_block -> sb_next = NULL;
141 stack_block -> sb_prev = NULL;
142 if (stack_block != & task_stack -> ts_first_block) {
143 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
144 }
145 stack_block = next_block;
146 }
147 // initialize the stack to be empty
148 task_stack -> ts_entries = 0;
149 task_stack -> ts_top = NULL;
150}
151
152
153//---------------------------------------------------------------------------
154// __kmp_push_task_stack: Push the tied task onto the task stack.
155// Grow the stack if necessary by allocating another block.
156//
157// gtid: global thread identifier for calling thread
158// thread: thread info for thread containing stack
159// tied_task: the task to push on the stack
160
161static void
162__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
163{
164 // GEH - need to consider what to do if tt_threads_data not allocated yet
165 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
166 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
167 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
168
169 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
170 return; // Don't push anything on stack if team or team tasks are serialized
171 }
172
173 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
174 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
175
176 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
177 gtid, thread, tied_task ) );
178 // Store entry
179 * (task_stack -> ts_top) = tied_task;
180
181 // Do bookkeeping for next push
182 task_stack -> ts_top++;
183 task_stack -> ts_entries++;
184
185 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
186 {
187 // Find beginning of this task block
188 kmp_stack_block_t *stack_block =
189 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
190
191 // Check if we already have a block
192 if ( stack_block -> sb_next != NULL )
193 { // reset ts_top to beginning of next block
194 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
195 }
196 else
197 { // Alloc new block and link it up
198 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
199 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
200
201 task_stack -> ts_top = & new_block -> sb_block[0];
202 stack_block -> sb_next = new_block;
203 new_block -> sb_prev = stack_block;
204 new_block -> sb_next = NULL;
205
206 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
207 gtid, tied_task, new_block ) );
208 }
209 }
210 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
211}
212
213//---------------------------------------------------------------------------
214// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
215// the task, just check to make sure it matches the ending task passed in.
216//
217// gtid: global thread identifier for the calling thread
218// thread: thread info structure containing stack
219// tied_task: the task popped off the stack
220// ending_task: the task that is ending (should match popped task)
221
222static void
223__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
224{
225 // GEH - need to consider what to do if tt_threads_data not allocated yet
226 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
227 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
228 kmp_taskdata_t *tied_task;
229
230 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
231 return; // Don't pop anything from stack if team or team tasks are serialized
232 }
233
234 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
235 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
236
237 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
238
239 // fix up ts_top if we need to pop from previous block
240 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
241 {
242 kmp_stack_block_t *stack_block =
243 (kmp_stack_block_t *) (task_stack -> ts_top) ;
244
245 stack_block = stack_block -> sb_prev;
246 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
247 }
248
249 // finish bookkeeping
250 task_stack -> ts_top--;
251 task_stack -> ts_entries--;
252
253 tied_task = * (task_stack -> ts_top );
254
255 KMP_DEBUG_ASSERT( tied_task != NULL );
256 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
257 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
258
259 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
260 return;
261}
262#endif /* BUILD_TIED_TASK_STACK */
263
264//---------------------------------------------------
265// __kmp_push_task: Add a task to the thread's deque
266
267static kmp_int32
268__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
269{
270 kmp_info_t * thread = __kmp_threads[ gtid ];
271 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
272 kmp_task_team_t * task_team = thread->th.th_task_team;
273 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
274 kmp_thread_data_t * thread_data;
275
276 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
277
278 // The first check avoids building task_team thread data if serialized
279 if ( taskdata->td_flags.task_serial ) {
280 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
281 gtid, taskdata ) );
282 return TASK_NOT_PUSHED;
283 }
284
285 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
286 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000287 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000288 __kmp_enable_tasking( task_team, thread );
289 }
290 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
291 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
292
293 // Find tasking deque specific to encountering thread
294 thread_data = & task_team -> tt.tt_threads_data[ tid ];
295
296 // No lock needed since only owner can allocate
297 if (thread_data -> td.td_deque == NULL ) {
298 __kmp_alloc_task_deque( thread, thread_data );
299 }
300
301 // Check if deque is full
302 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
303 {
304 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
305 gtid, taskdata ) );
306 return TASK_NOT_PUSHED;
307 }
308
309 // Lock the deque for the task push operation
310 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
311
312 // Must have room since no thread can add tasks but calling thread
313 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
314
315 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
316 // Wrap index.
317 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
318 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
319
320 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
321
322 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
323 "task=%p ntasks=%d head=%u tail=%u\n",
324 gtid, taskdata, thread_data->td.td_deque_ntasks,
325 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
326
327 return TASK_SUCCESSFULLY_PUSHED;
328}
329
330
331//-----------------------------------------------------------------------------------------
332// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
333// this_thr: thread structure to set current_task in.
334
335void
336__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
337{
338 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
339 "curtask_parent=%p\n",
340 0, this_thr, this_thr -> th.th_current_task,
341 this_thr -> th.th_current_task -> td_parent ) );
342
343 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
344
345 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
346 "curtask_parent=%p\n",
347 0, this_thr, this_thr -> th.th_current_task,
348 this_thr -> th.th_current_task -> td_parent ) );
349}
350
351
352//---------------------------------------------------------------------------------------
353// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
354// this_thr: thread structure to set up
355// team: team for implicit task data
356// tid: thread within team to set up
357
358void
359__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
360{
361 // current task of the thread is a parent of the new just created implicit tasks of new team
362 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
363 "parent_task=%p\n",
364 tid, this_thr, this_thr->th.th_current_task,
365 team->t.t_implicit_task_taskdata[tid].td_parent ) );
366
367 KMP_DEBUG_ASSERT (this_thr != NULL);
368
369 if( tid == 0 ) {
370 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
371 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
372 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
373 }
374 } else {
375 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
376 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
377 }
378
379 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
380 "parent_task=%p\n",
381 tid, this_thr, this_thr->th.th_current_task,
382 team->t.t_implicit_task_taskdata[tid].td_parent ) );
383}
384
385
386//----------------------------------------------------------------------
387// __kmp_task_start: bookkeeping for a task starting execution
388// GTID: global thread id of calling thread
389// task: task starting execution
390// current_task: task suspending
391
392static void
393__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
394{
395 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
396 kmp_info_t * thread = __kmp_threads[ gtid ];
397
398 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
399 gtid, taskdata, current_task) );
400
401 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
402
403 // mark currently executing task as suspended
404 // TODO: GEH - make sure root team implicit task is initialized properly.
405 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
406 current_task -> td_flags.executing = 0;
407
408 // Add task to stack if tied
409#ifdef BUILD_TIED_TASK_STACK
410 if ( taskdata -> td_flags.tiedness == TASK_TIED )
411 {
412 __kmp_push_task_stack( gtid, thread, taskdata );
413 }
414#endif /* BUILD_TIED_TASK_STACK */
415
416 // mark starting task as executing and as current task
417 thread -> th.th_current_task = taskdata;
418
419 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
420 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
421 taskdata -> td_flags.started = 1;
422 taskdata -> td_flags.executing = 1;
423 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
424 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
425
426 // GEH TODO: shouldn't we pass some sort of location identifier here?
427 // APT: yes, we will pass location here.
428 // need to store current thread state (in a thread or taskdata structure)
429 // before setting work_state, otherwise wrong state is set after end of task
430
431 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
432 gtid, taskdata ) );
433
434 return;
435}
436
437
438//----------------------------------------------------------------------
439// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
440// loc_ref: source location information; points to beginning of task block.
441// gtid: global thread number.
442// task: task thunk for the started task.
443
444void
445__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
446{
447 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
448 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
449
450 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
451 gtid, loc_ref, taskdata, current_task ) );
452
453 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
454 __kmp_task_start( gtid, task, current_task );
455
456 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
457 gtid, loc_ref, taskdata ) );
458
459 return;
460}
461
462#ifdef TASK_UNUSED
463//----------------------------------------------------------------------
464// __kmpc_omp_task_begin: report that a given task has started execution
465// NEVER GENERATED BY COMPILER, DEPRECATED!!!
466
467void
468__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
469{
470 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
471
472 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
473 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
474
475 __kmp_task_start( gtid, task, current_task );
476
477 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
478 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
479
480 return;
481}
482#endif // TASK_UNUSED
483
484
485//-------------------------------------------------------------------------------------
486// __kmp_free_task: free the current task space and the space for shareds
487// gtid: Global thread ID of calling thread
488// taskdata: task to free
489// thread: thread data structure of caller
490
491static void
492__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
493{
494 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
495 gtid, taskdata) );
496
497 // Check to make sure all flags and counters have the correct values
498 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
499 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
500 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
501 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
502 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
503 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
504
505 taskdata->td_flags.freed = 1;
506 // deallocate the taskdata and shared variable blocks associated with this task
507 #if USE_FAST_MEMORY
508 __kmp_fast_free( thread, taskdata );
509 #else /* ! USE_FAST_MEMORY */
510 __kmp_thread_free( thread, taskdata );
511 #endif
512
513 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
514 gtid, taskdata) );
515}
516
517//-------------------------------------------------------------------------------------
518// __kmp_free_task_and_ancestors: free the current task and ancestors without children
519//
520// gtid: Global thread ID of calling thread
521// taskdata: task to free
522// thread: thread data structure of caller
523
524static void
525__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
526{
527 kmp_int32 children = 0;
528 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
529
530 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
531
532 if ( !team_or_tasking_serialized ) {
533 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
534 KMP_DEBUG_ASSERT( children >= 0 );
535 }
536
537 // Now, go up the ancestor tree to see if any ancestors can now be freed.
538 while ( children == 0 )
539 {
540 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
541
542 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
543 "and freeing itself\n", gtid, taskdata) );
544
545 // --- Deallocate my ancestor task ---
546 __kmp_free_task( gtid, taskdata, thread );
547
548 taskdata = parent_taskdata;
549
550 // Stop checking ancestors at implicit task or if tasking serialized
551 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
552 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
553 return;
554
555 if ( !team_or_tasking_serialized ) {
556 // Predecrement simulated by "- 1" calculation
557 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
558 KMP_DEBUG_ASSERT( children >= 0 );
559 }
560 }
561
562 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
563 "not freeing it yet\n", gtid, taskdata, children) );
564}
565
566//---------------------------------------------------------------------
567// __kmp_task_finish: bookkeeping to do when a task finishes execution
568// gtid: global thread ID for calling thread
569// task: task to be finished
570// resumed_task: task to be resumed. (may be NULL if task is serialized)
571
572static void
573__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
574{
575 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
576 kmp_info_t * thread = __kmp_threads[ gtid ];
577 kmp_int32 children = 0;
578
579 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
580 gtid, taskdata, resumed_task) );
581
582 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
583
584 // Pop task from stack if tied
585#ifdef BUILD_TIED_TASK_STACK
586 if ( taskdata -> td_flags.tiedness == TASK_TIED )
587 {
588 __kmp_pop_task_stack( gtid, thread, taskdata );
589 }
590#endif /* BUILD_TIED_TASK_STACK */
591
Jim Cownie5e8470a2013-09-27 10:38:44 +0000592 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000593 taskdata -> td_flags.complete = 1; // mark the task as completed
594 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
595 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
596
597 // Only need to keep track of count if team parallel and tasking not serialized
598 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
599 // Predecrement simulated by "- 1" calculation
600 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
601 KMP_DEBUG_ASSERT( children >= 0 );
602#if OMP_40_ENABLED
603 if ( taskdata->td_taskgroup )
604 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000605 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000606#endif
607 }
608
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000609 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
610 // Othertwise, if a task is executed immediately from the release_deps code
611 // the flag will be reset to 1 again by this same function
612 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
613 taskdata -> td_flags.executing = 0; // suspend the finishing task
614
Jim Cownie5e8470a2013-09-27 10:38:44 +0000615 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
616 gtid, taskdata, children) );
617
Jim Cownie181b4bb2013-12-23 17:28:57 +0000618#if OMP_40_ENABLED
619 /* If the tasks' destructor thunk flag has been set, we need to invoke the
620 destructor thunk that has been generated by the compiler.
621 The code is placed here, since at this point other tasks might have been released
622 hence overlapping the destructor invokations with some other work in the
623 released tasks. The OpenMP spec is not specific on when the destructors are
624 invoked, so we should be free to choose.
625 */
626 if (taskdata->td_flags.destructors_thunk) {
627 kmp_routine_entry_t destr_thunk = task->destructors;
628 KMP_ASSERT(destr_thunk);
629 destr_thunk(gtid, task);
630 }
631#endif // OMP_40_ENABLED
632
Jim Cownie5e8470a2013-09-27 10:38:44 +0000633 // bookkeeping for resuming task:
634 // GEH - note tasking_ser => task_serial
635 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
636 taskdata->td_flags.task_serial);
637 if ( taskdata->td_flags.task_serial )
638 {
639 if (resumed_task == NULL) {
640 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
641 }
642 else {
643 // verify resumed task passed in points to parent
644 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
645 }
646 }
647 else {
648 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
649 }
650
651 // Free this task and then ancestor tasks if they have no children.
652 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
653
654 __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
655
656 // TODO: GEH - make sure root team implicit task is initialized properly.
657 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
658 resumed_task->td_flags.executing = 1; // resume previous task
659
660 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
661 gtid, taskdata, resumed_task) );
662
663 return;
664}
665
666//---------------------------------------------------------------------
667// __kmpc_omp_task_complete_if0: report that a task has completed execution
668// loc_ref: source location information; points to end of task block.
669// gtid: global thread number.
670// task: task thunk for the completed task.
671
672void
673__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
674{
675 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
676 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
677
678 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
679
680 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
681 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
682
683 return;
684}
685
686#ifdef TASK_UNUSED
687//---------------------------------------------------------------------
688// __kmpc_omp_task_complete: report that a task has completed execution
689// NEVER GENERATED BY COMPILER, DEPRECATED!!!
690
691void
692__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
693{
694 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
695 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
696
697 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
698
699 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
700 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
701 return;
702}
703#endif // TASK_UNUSED
704
705
706//----------------------------------------------------------------------------------------------------
707// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
708//
709// loc_ref: reference to source location of parallel region
710// this_thr: thread data structure corresponding to implicit task
711// team: team for this_thr
712// tid: thread id of given thread within team
713// set_curr_task: TRUE if need to push current task to thread
714// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
715// TODO: Get better loc_ref. Value passed in may be NULL
716
717void
718__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
719{
720 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
721
722 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
723 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
724
725 task->td_task_id = KMP_GEN_TASK_ID();
726 task->td_team = team;
727// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
728 task->td_ident = loc_ref;
729 task->td_taskwait_ident = NULL;
730 task->td_taskwait_counter = 0;
731 task->td_taskwait_thread = 0;
732
733 task->td_flags.tiedness = TASK_TIED;
734 task->td_flags.tasktype = TASK_IMPLICIT;
735 // All implicit tasks are executed immediately, not deferred
736 task->td_flags.task_serial = 1;
737 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
738 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
739
740 task->td_flags.started = 1;
741 task->td_flags.executing = 1;
742 task->td_flags.complete = 0;
743 task->td_flags.freed = 0;
744
Jim Cownie181b4bb2013-12-23 17:28:57 +0000745#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000746 task->td_dephash = NULL;
747 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000748#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000749
750 if (set_curr_task) { // only do this initialization the first time a thread is created
751 task->td_incomplete_child_tasks = 0;
752 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
753#if OMP_40_ENABLED
754 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
755#endif
756 __kmp_push_current_task_to_thread( this_thr, team, tid );
757 } else {
758 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
759 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
760 }
761
762 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
763 tid, team, task ) );
764}
765
766// Round up a size to a power of two specified by val
767// Used to insert padding between structures co-allocated using a single malloc() call
768static size_t
769__kmp_round_up_to_val( size_t size, size_t val ) {
770 if ( size & ( val - 1 ) ) {
771 size &= ~ ( val - 1 );
772 if ( size <= KMP_SIZE_T_MAX - val ) {
773 size += val; // Round up if there is no overflow.
774 }; // if
775 }; // if
776 return size;
777} // __kmp_round_up_to_va
778
779
780//---------------------------------------------------------------------------------
781// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
782//
783// loc_ref: source location information
784// gtid: global thread number.
785// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
786// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
787// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
788// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
789// task_entry: Pointer to task code entry point generated by compiler.
790// returns: a pointer to the allocated kmp_task_t structure (task).
791
792kmp_task_t *
793__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
794 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
795 kmp_routine_entry_t task_entry )
796{
797 kmp_task_t *task;
798 kmp_taskdata_t *taskdata;
799 kmp_info_t *thread = __kmp_threads[ gtid ];
800 kmp_team_t *team = thread->th.th_team;
801 kmp_taskdata_t *parent_task = thread->th.th_current_task;
802 size_t shareds_offset;
803
804 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
805 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
806 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
807 sizeof_shareds, task_entry) );
808
809 if ( parent_task->td_flags.final ) {
810 if (flags->merged_if0) {
811 }
812 flags->final = 1;
813 }
814
815 // Calculate shared structure offset including padding after kmp_task_t struct
816 // to align pointers in shared struct
817 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
818 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
819
820 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
821 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
822 gtid, shareds_offset) );
823 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
824 gtid, sizeof_shareds) );
825
826 // Avoid double allocation here by combining shareds with taskdata
827 #if USE_FAST_MEMORY
828 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
829 #else /* ! USE_FAST_MEMORY */
830 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
831 #endif /* USE_FAST_MEMORY */
832
833 task = KMP_TASKDATA_TO_TASK(taskdata);
834
835 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000836#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000837 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
838 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
839#else
840 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
841 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
842#endif
843 if (sizeof_shareds > 0) {
844 // Avoid double allocation here by combining shareds with taskdata
845 task->shareds = & ((char *) taskdata)[ shareds_offset ];
846 // Make sure shareds struct is aligned to pointer size
847 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
848 } else {
849 task->shareds = NULL;
850 }
851 task->routine = task_entry;
852 task->part_id = 0; // AC: Always start with 0 part id
853
854 taskdata->td_task_id = KMP_GEN_TASK_ID();
855 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000856 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000857 taskdata->td_parent = parent_task;
858 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
859 taskdata->td_ident = loc_ref;
860 taskdata->td_taskwait_ident = NULL;
861 taskdata->td_taskwait_counter = 0;
862 taskdata->td_taskwait_thread = 0;
863 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
864 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
865
866 taskdata->td_flags.tiedness = flags->tiedness;
867 taskdata->td_flags.final = flags->final;
868 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000869#if OMP_40_ENABLED
870 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
871#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000872 taskdata->td_flags.tasktype = TASK_EXPLICIT;
873
874 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
875 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
876
877 // GEH - TODO: fix this to copy parent task's value of team_serial flag
878 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
879
880 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
881 // tasks are not left until program termination to execute. Also, it helps locality to execute
882 // immediately.
883 taskdata->td_flags.task_serial = ( taskdata->td_flags.final
884 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
885
886 taskdata->td_flags.started = 0;
887 taskdata->td_flags.executing = 0;
888 taskdata->td_flags.complete = 0;
889 taskdata->td_flags.freed = 0;
890
891 taskdata->td_flags.native = flags->native;
892
893 taskdata->td_incomplete_child_tasks = 0;
894 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
895#if OMP_40_ENABLED
896 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
897 taskdata->td_dephash = NULL;
898 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000899#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000900 // Only need to keep track of child task counts if team parallel and tasking not serialized
901 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
902 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
903#if OMP_40_ENABLED
904 if ( parent_task->td_taskgroup )
905 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
906#endif
907 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
908 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
909 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
910 }
911 }
912
913 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
914 gtid, taskdata, taskdata->td_parent) );
915
916 return task;
917}
918
919
920kmp_task_t *
921__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
922 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
923 kmp_routine_entry_t task_entry )
924{
925 kmp_task_t *retval;
926 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
927
928 input_flags->native = FALSE;
929 // __kmp_task_alloc() sets up all other runtime flags
930
931 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
932 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
933 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
934 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
935
936 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
937 sizeof_shareds, task_entry );
938
939 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
940
941 return retval;
942}
943
944//-----------------------------------------------------------
945// __kmp_invoke_task: invoke the specified task
946//
947// gtid: global thread ID of caller
948// task: the task to invoke
949// current_task: the task to resume after task invokation
950
951static void
952__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
953{
954 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jim Cownie181b4bb2013-12-23 17:28:57 +0000955#if OMP_40_ENABLED
956 int discard = 0 /* false */;
957#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000958 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
959 gtid, taskdata, current_task) );
960
961 __kmp_task_start( gtid, task, current_task );
962
Jim Cownie181b4bb2013-12-23 17:28:57 +0000963#if OMP_40_ENABLED
964 // TODO: cancel tasks if the parallel region has also been cancelled
965 // TODO: check if this sequence can be hoisted above __kmp_task_start
966 // if cancellation has been enabled for this run ...
967 if (__kmp_omp_cancellation) {
968 kmp_info_t *this_thr = __kmp_threads [ gtid ];
969 kmp_team_t * this_team = this_thr->th.th_team;
970 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
971 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
972 // this task belongs to a task group and we need to cancel it
973 discard = 1 /* true */;
974 }
975 }
976
Jim Cownie5e8470a2013-09-27 10:38:44 +0000977 //
978 // Invoke the task routine and pass in relevant data.
979 // Thunks generated by gcc take a different argument list.
980 //
Jim Cownie181b4bb2013-12-23 17:28:57 +0000981 if (!discard) {
982#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000983#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +0000984 if (taskdata->td_flags.native) {
985 ((void (*)(void *))(*(task->routine)))(task->shareds);
986 }
987 else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000988#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +0000989 {
990 (*(task->routine))(gtid, task);
991 }
992#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000993 }
Jim Cownie181b4bb2013-12-23 17:28:57 +0000994#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000995
996 __kmp_task_finish( gtid, task, current_task );
997
998 KA_TRACE(30, ("__kmp_inovke_task(exit): T#%d completed task %p, resuming task %p\n",
999 gtid, taskdata, current_task) );
1000 return;
1001}
1002
1003//-----------------------------------------------------------------------
1004// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1005//
1006// loc_ref: location of original task pragma (ignored)
1007// gtid: Global Thread ID of encountering thread
1008// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1009// Returns:
1010// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1011// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1012
1013kmp_int32
1014__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1015{
1016 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1017
1018 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1019 gtid, loc_ref, new_taskdata ) );
1020
1021 /* Should we execute the new task or queue it? For now, let's just always try to
1022 queue it. If the queue fills up, then we'll execute it. */
1023
1024 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1025 { // Execute this task immediately
1026 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1027 new_taskdata->td_flags.task_serial = 1;
1028 __kmp_invoke_task( gtid, new_task, current_task );
1029 }
1030
1031 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1032 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1033 new_taskdata ) );
1034
1035 return TASK_CURRENT_NOT_QUEUED;
1036}
1037
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001038//---------------------------------------------------------------------
1039// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1040// gtid: Global Thread ID of encountering thread
1041// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1042// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1043// returns:
1044//
1045// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1046// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1047kmp_int32
1048__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1049{
1050 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1051
1052 /* Should we execute the new task or queue it? For now, let's just always try to
1053 queue it. If the queue fills up, then we'll execute it. */
1054
1055 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1056 { // Execute this task immediately
1057 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1058 if ( serialize_immediate )
1059 new_taskdata -> td_flags.task_serial = 1;
1060 __kmp_invoke_task( gtid, new_task, current_task );
1061 }
1062
1063
1064 return TASK_CURRENT_NOT_QUEUED;
1065}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001066
1067//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001068// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1069// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001070// loc_ref: location of original task pragma (ignored)
1071// gtid: Global Thread ID of encountering thread
1072// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1073// returns:
1074//
1075// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1076// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1077
1078kmp_int32
1079__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1080{
1081 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001082 kmp_int32 res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001083
1084 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1085 gtid, loc_ref, new_taskdata ) );
1086
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001087 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001088
1089 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1090 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001091 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001092}
1093
Jim Cownie5e8470a2013-09-27 10:38:44 +00001094//-------------------------------------------------------------------------------------
1095// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1096
1097kmp_int32
1098__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1099{
1100 kmp_taskdata_t * taskdata;
1101 kmp_info_t * thread;
1102 int thread_finished = FALSE;
1103
1104 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1105 gtid, loc_ref) );
1106
1107 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1108 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1109
1110 thread = __kmp_threads[ gtid ];
1111 taskdata = thread -> th.th_current_task;
1112#if USE_ITT_BUILD
1113 // Note: These values are used by ITT events as well.
1114#endif /* USE_ITT_BUILD */
1115 taskdata->td_taskwait_counter += 1;
1116 taskdata->td_taskwait_ident = loc_ref;
1117 taskdata->td_taskwait_thread = gtid + 1;
1118
1119#if USE_ITT_BUILD
1120 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1121 if ( itt_sync_obj != NULL )
1122 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1123#endif /* USE_ITT_BUILD */
1124
1125 if ( ! taskdata->td_flags.team_serial ) {
1126 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001127 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001128 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001129 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1130 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001131 }
1132 }
1133#if USE_ITT_BUILD
1134 if ( itt_sync_obj != NULL )
1135 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1136#endif /* USE_ITT_BUILD */
1137
1138 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1139 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1140 }
1141
1142 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1143 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1144
1145 return TASK_CURRENT_NOT_QUEUED;
1146}
1147
1148
1149//-------------------------------------------------
1150// __kmpc_omp_taskyield: switch to a different task
1151
1152kmp_int32
1153__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1154{
1155 kmp_taskdata_t * taskdata;
1156 kmp_info_t * thread;
1157 int thread_finished = FALSE;
1158
1159 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1160 gtid, loc_ref, end_part) );
1161
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001162 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001163 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1164
1165 thread = __kmp_threads[ gtid ];
1166 taskdata = thread -> th.th_current_task;
1167 // Should we model this as a task wait or not?
1168#if USE_ITT_BUILD
1169 // Note: These values are used by ITT events as well.
1170#endif /* USE_ITT_BUILD */
1171 taskdata->td_taskwait_counter += 1;
1172 taskdata->td_taskwait_ident = loc_ref;
1173 taskdata->td_taskwait_thread = gtid + 1;
1174
1175#if USE_ITT_BUILD
1176 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1177 if ( itt_sync_obj != NULL )
1178 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1179#endif /* USE_ITT_BUILD */
1180 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001181 kmp_task_team_t * task_team = thread->th.th_task_team;
1182 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001183 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001184 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1185 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1186 }
1187 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001188 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001189#if USE_ITT_BUILD
1190 if ( itt_sync_obj != NULL )
1191 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1192#endif /* USE_ITT_BUILD */
1193
1194 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1195 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1196 }
1197
1198 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1199 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1200
1201 return TASK_CURRENT_NOT_QUEUED;
1202}
1203
1204
1205#if OMP_40_ENABLED
1206//-------------------------------------------------------------------------------------
1207// __kmpc_taskgroup: Start a new taskgroup
1208
1209void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001210__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001211{
1212 kmp_info_t * thread = __kmp_threads[ gtid ];
1213 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1214 kmp_taskgroup_t * tg_new =
1215 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1216 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1217 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001218 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001219 tg_new->parent = taskdata->td_taskgroup;
1220 taskdata->td_taskgroup = tg_new;
1221}
1222
1223
1224//-------------------------------------------------------------------------------------
1225// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1226// and its descendants are complete
1227
1228void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001229__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001230{
1231 kmp_info_t * thread = __kmp_threads[ gtid ];
1232 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1233 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1234 int thread_finished = FALSE;
1235
1236 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1237 KMP_DEBUG_ASSERT( taskgroup != NULL );
1238
1239 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1240#if USE_ITT_BUILD
1241 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1242 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1243 if ( itt_sync_obj != NULL )
1244 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1245#endif /* USE_ITT_BUILD */
1246
1247 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001248 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001249 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001250 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1251 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001252 }
1253 }
1254
1255#if USE_ITT_BUILD
1256 if ( itt_sync_obj != NULL )
1257 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1258#endif /* USE_ITT_BUILD */
1259 }
1260 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1261
1262 // Restore parent taskgroup for the current task
1263 taskdata->td_taskgroup = taskgroup->parent;
1264 __kmp_thread_free( thread, taskgroup );
1265
1266 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1267}
1268#endif
1269
1270
1271//------------------------------------------------------
1272// __kmp_remove_my_task: remove a task from my own deque
1273
1274static kmp_task_t *
1275__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1276 kmp_int32 is_constrained )
1277{
1278 kmp_task_t * task;
1279 kmp_taskdata_t * taskdata;
1280 kmp_thread_data_t *thread_data;
1281 kmp_uint32 tail;
1282
1283 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1284 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1285
1286 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1287
1288 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1289 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1290 thread_data->td.td_deque_tail) );
1291
1292 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1293 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1294 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1295 thread_data->td.td_deque_tail) );
1296 return NULL;
1297 }
1298
1299 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1300
1301 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1302 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1303 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1304 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1305 thread_data->td.td_deque_tail) );
1306 return NULL;
1307 }
1308
1309 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1310 taskdata = thread_data -> td.td_deque[ tail ];
1311
1312 if (is_constrained) {
1313 // we need to check if the candidate obeys task scheduling constraint:
1314 // only child of current task can be scheduled
1315 kmp_taskdata_t * current = thread->th.th_current_task;
1316 kmp_int32 level = current->td_level;
1317 kmp_taskdata_t * parent = taskdata->td_parent;
1318 while ( parent != current && parent->td_level > level ) {
1319 parent = parent->td_parent; // check generation up to the level of the current task
1320 KMP_DEBUG_ASSERT(parent != NULL);
1321 }
1322 if ( parent != current ) {
1323 // If the tail task is not a child, then no other childs can appear in the deque.
1324 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1325 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1326 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1327 thread_data->td.td_deque_tail) );
1328 return NULL;
1329 }
1330 }
1331
1332 thread_data -> td.td_deque_tail = tail;
1333 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1334
1335 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1336
1337 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1338 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1339 thread_data->td.td_deque_tail) );
1340
1341 task = KMP_TASKDATA_TO_TASK( taskdata );
1342 return task;
1343}
1344
1345
1346//-----------------------------------------------------------
1347// __kmp_steal_task: remove a task from another thread's deque
1348// Assume that calling thread has already checked existence of
1349// task_team thread_data before calling this routine.
1350
1351static kmp_task_t *
1352__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1353 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1354 kmp_int32 is_constrained )
1355{
1356 kmp_task_t * task;
1357 kmp_taskdata_t * taskdata;
1358 kmp_thread_data_t *victim_td, *threads_data;
1359 kmp_int32 victim_tid, thread_tid;
1360
1361 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1362
1363 threads_data = task_team -> tt.tt_threads_data;
1364 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1365
1366 victim_tid = victim->th.th_info.ds.ds_tid;
1367 victim_td = & threads_data[ victim_tid ];
1368
1369 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1370 "head=%u tail=%u\n",
1371 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1372 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1373
1374 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1375 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1376 {
1377 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1378 "ntasks=%d head=%u tail=%u\n",
1379 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1380 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1381 return NULL;
1382 }
1383
1384 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1385
1386 // Check again after we acquire the lock
1387 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1388 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1389 {
1390 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1391 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1392 "ntasks=%d head=%u tail=%u\n",
1393 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1394 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1395 return NULL;
1396 }
1397
1398 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1399
1400 if ( !is_constrained ) {
1401 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1402 // Bump head pointer and Wrap.
1403 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1404 } else {
1405 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1406 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1407 taskdata = victim_td -> td.td_deque[ tail ];
1408 // we need to check if the candidate obeys task scheduling constraint:
1409 // only child of current task can be scheduled
1410 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1411 kmp_int32 level = current->td_level;
1412 kmp_taskdata_t * parent = taskdata->td_parent;
1413 while ( parent != current && parent->td_level > level ) {
1414 parent = parent->td_parent; // check generation up to the level of the current task
1415 KMP_DEBUG_ASSERT(parent != NULL);
1416 }
1417 if ( parent != current ) {
1418 // If the tail task is not a child, then no other childs can appear in the deque (?).
1419 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1420 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1421 "ntasks=%d head=%u tail=%u\n",
1422 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1423 task_team, victim_td->td.td_deque_ntasks,
1424 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1425 return NULL;
1426 }
1427 victim_td -> td.td_deque_tail = tail;
1428 }
1429 if (*thread_finished) {
1430 // We need to un-mark this victim as a finished victim. This must be done before
1431 // releasing the lock, or else other threads (starting with the master victim)
1432 // might be prematurely released from the barrier!!!
1433 kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1434
1435 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1436 gtid, count + 1, task_team) );
1437
1438 *thread_finished = FALSE;
1439 }
1440 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1441
1442 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1443
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001444 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001445 "ntasks=%d head=%u tail=%u\n",
1446 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1447 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1448 victim_td->td.td_deque_tail) );
1449
1450 task = KMP_TASKDATA_TO_TASK( taskdata );
1451 return task;
1452}
1453
1454
1455//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001456// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001457// is statisfied (return true) or there are none left (return false).
1458// final_spin is TRUE if this is the spin at the release barrier.
1459// thread_finished indicates whether the thread is finished executing all
1460// the tasks it has on its deque, and is at the release barrier.
1461// spinner is the location on which to spin.
1462// spinner == NULL means only execute a single task and return.
1463// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001464template <class C>
1465static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1466 int *thread_finished
1467 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001468{
1469 kmp_task_team_t * task_team;
1470 kmp_team_t * team;
1471 kmp_thread_data_t * threads_data;
1472 kmp_task_t * task;
1473 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1474 volatile kmp_uint32 * unfinished_threads;
1475 kmp_int32 nthreads, last_stolen, k, tid;
1476
1477 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1478 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1479
1480 task_team = thread -> th.th_task_team;
1481 KMP_DEBUG_ASSERT( task_team != NULL );
1482
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001483 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001484 gtid, final_spin, *thread_finished) );
1485
1486 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1487 KMP_DEBUG_ASSERT( threads_data != NULL );
1488
1489 nthreads = task_team -> tt.tt_nproc;
1490 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1491 KMP_DEBUG_ASSERT( nthreads > 1 );
1492 KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1493
1494 // Choose tasks from our own work queue.
1495 start:
1496 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1497#if USE_ITT_BUILD && USE_ITT_NOTIFY
1498 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1499 if ( itt_sync_obj == NULL ) {
1500 // we are at fork barrier where we could not get the object reliably
1501 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1502 }
1503 __kmp_itt_task_starting( itt_sync_obj );
1504 }
1505#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1506 __kmp_invoke_task( gtid, task, current_task );
1507#if USE_ITT_BUILD
1508 if ( itt_sync_obj != NULL )
1509 __kmp_itt_task_finished( itt_sync_obj );
1510#endif /* USE_ITT_BUILD */
1511
1512 // If this thread is only partway through the barrier and the condition
1513 // is met, then return now, so that the barrier gather/release pattern can proceed.
1514 // If this thread is in the last spin loop in the barrier, waiting to be
1515 // released, we know that the termination condition will not be satisified,
1516 // so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001517 if (flag == NULL || (!final_spin && flag->done_check())) {
1518 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001519 return TRUE;
1520 }
1521 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1522 }
1523
1524 // This thread's work queue is empty. If we are in the final spin loop
1525 // of the barrier, check and see if the termination condition is satisfied.
1526 if (final_spin) {
1527 // First, decrement the #unfinished threads, if that has not already
1528 // been done. This decrement might be to the spin location, and
1529 // result in the termination condition being satisfied.
1530 if (! *thread_finished) {
1531 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001532 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001533 gtid, count, task_team) );
1534 *thread_finished = TRUE;
1535 }
1536
1537 // It is now unsafe to reference thread->th.th_team !!!
1538 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1539 // thread to pass through the barrier, where it might reset each thread's
1540 // th.th_team field for the next parallel region.
1541 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001542 if (flag != NULL && flag->done_check()) {
1543 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001544 return TRUE;
1545 }
1546 }
1547
1548 // Try to steal from the last place I stole from successfully.
1549 tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1550 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1551
1552 if (last_stolen != -1) {
1553 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1554
1555 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1556 thread_finished, is_constrained )) != NULL)
1557 {
1558#if USE_ITT_BUILD && USE_ITT_NOTIFY
1559 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1560 if ( itt_sync_obj == NULL ) {
1561 // we are at fork barrier where we could not get the object reliably
1562 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1563 }
1564 __kmp_itt_task_starting( itt_sync_obj );
1565 }
1566#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1567 __kmp_invoke_task( gtid, task, current_task );
1568#if USE_ITT_BUILD
1569 if ( itt_sync_obj != NULL )
1570 __kmp_itt_task_finished( itt_sync_obj );
1571#endif /* USE_ITT_BUILD */
1572
1573 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001574 if (flag == NULL || (!final_spin && flag->done_check())) {
1575 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001576 gtid) );
1577 return TRUE;
1578 }
1579
1580 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1581 // If the execution of the stolen task resulted in more tasks being
1582 // placed on our run queue, then restart the whole process.
1583 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001584 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001585 gtid) );
1586 goto start;
1587 }
1588 }
1589
1590 // Don't give priority to stealing from this thread anymore.
1591 threads_data[ tid ].td.td_deque_last_stolen = -1;
1592
1593 // The victims's work queue is empty. If we are in the final spin loop
1594 // of the barrier, check and see if the termination condition is satisfied.
1595 if (final_spin) {
1596 // First, decrement the #unfinished threads, if that has not already
1597 // been done. This decrement might be to the spin location, and
1598 // result in the termination condition being satisfied.
1599 if (! *thread_finished) {
1600 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001601 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001602 "task_team=%p\n", gtid, count, task_team) );
1603 *thread_finished = TRUE;
1604 }
1605
1606 // If __kmp_tasking_mode != tskm_immediate_exec
1607 // then it is now unsafe to reference thread->th.th_team !!!
1608 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1609 // thread to pass through the barrier, where it might reset each thread's
1610 // th.th_team field for the next parallel region.
1611 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001612 if (flag != NULL && flag->done_check()) {
1613 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001614 gtid) );
1615 return TRUE;
1616 }
1617 }
1618 }
1619
1620 // Find a different thread to steal work from. Pick a random thread.
1621 // My initial plan was to cycle through all the threads, and only return
1622 // if we tried to steal from every thread, and failed. Arch says that's
1623 // not such a great idea.
1624 // GEH - need yield code in this loop for throughput library mode?
1625 new_victim:
1626 k = __kmp_get_random( thread ) % (nthreads - 1);
1627 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1628 ++k; // Adjusts random distribution to exclude self
1629 }
1630 {
1631 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1632 int first;
1633
1634 // There is a slight chance that __kmp_enable_tasking() did not wake up
1635 // all threads waiting at the barrier. If this thread is sleeping, then
1636 // then wake it up. Since we weree going to pay the cache miss penalty
1637 // for referenceing another thread's kmp_info_t struct anyway, the check
1638 // shouldn't cost too much performance at this point.
1639 // In extra barrier mode, tasks do not sleep at the separate tasking
1640 // barrier, so this isn't a problem.
1641 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1642 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1643 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1644 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001645 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001646 // A sleeping thread should not have any tasks on it's queue.
Alp Toker8f2d3f02014-02-24 10:40:15 +00001647 // There is a slight possibility that it resumes, steals a task from
Jim Cownie5e8470a2013-09-27 10:38:44 +00001648 // another thread, which spawns more tasks, all in the that it takes
1649 // this thread to check => don't write an assertion that the victim's
1650 // queue is empty. Try stealing from a different thread.
1651 goto new_victim;
1652 }
1653
1654 // Now try to steal work from the selected thread
1655 first = TRUE;
1656 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1657 thread_finished, is_constrained )) != NULL)
1658 {
1659#if USE_ITT_BUILD && USE_ITT_NOTIFY
1660 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1661 if ( itt_sync_obj == NULL ) {
1662 // we are at fork barrier where we could not get the object reliably
1663 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1664 }
1665 __kmp_itt_task_starting( itt_sync_obj );
1666 }
1667#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1668 __kmp_invoke_task( gtid, task, current_task );
1669#if USE_ITT_BUILD
1670 if ( itt_sync_obj != NULL )
1671 __kmp_itt_task_finished( itt_sync_obj );
1672#endif /* USE_ITT_BUILD */
1673
1674 // Try stealing from this victim again, in the future.
1675 if (first) {
1676 threads_data[ tid ].td.td_deque_last_stolen = k;
1677 first = FALSE;
1678 }
1679
1680 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001681 if (flag == NULL || (!final_spin && flag->done_check())) {
1682 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001683 gtid) );
1684 return TRUE;
1685 }
1686 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1687
1688 // If the execution of the stolen task resulted in more tasks being
1689 // placed on our run queue, then restart the whole process.
1690 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001691 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001692 gtid) );
1693 goto start;
1694 }
1695 }
1696
1697 // The victims's work queue is empty. If we are in the final spin loop
1698 // of the barrier, check and see if the termination condition is satisfied.
1699 // Going on and finding a new victim to steal from is expensive, as it
1700 // involves a lot of cache misses, so we definitely want to re-check the
1701 // termination condition before doing that.
1702 if (final_spin) {
1703 // First, decrement the #unfinished threads, if that has not already
1704 // been done. This decrement might be to the spin location, and
1705 // result in the termination condition being satisfied.
1706 if (! *thread_finished) {
1707 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001708 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001709 "task_team=%p\n",
1710 gtid, count, task_team) );
1711 *thread_finished = TRUE;
1712 }
1713
1714 // If __kmp_tasking_mode != tskm_immediate_exec,
1715 // then it is now unsafe to reference thread->th.th_team !!!
1716 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1717 // thread to pass through the barrier, where it might reset each thread's
1718 // th.th_team field for the next parallel region.
1719 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001720 if (flag != NULL && flag->done_check()) {
1721 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001722 return TRUE;
1723 }
1724 }
1725 }
1726
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001727 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001728 return FALSE;
1729}
1730
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001731int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1732 int *thread_finished
1733 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1734{
1735 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1736 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1737}
1738
1739int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
1740 int *thread_finished
1741 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1742{
1743 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1744 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1745}
1746
1747int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
1748 int *thread_finished
1749 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1750{
1751 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1752 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1753}
1754
1755
Jim Cownie5e8470a2013-09-27 10:38:44 +00001756
1757//-----------------------------------------------------------------------------
1758// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
1759// next barrier so they can assist in executing enqueued tasks.
1760// First thread in allocates the task team atomically.
1761
1762static void
1763__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
1764{
1765 kmp_team_t *team = this_thr->th.th_team;
1766 kmp_thread_data_t *threads_data;
1767 int nthreads, i, is_init_thread;
1768
1769 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
1770 __kmp_gtid_from_thread( this_thr ) ) );
1771
1772 KMP_DEBUG_ASSERT(task_team != NULL);
1773 KMP_DEBUG_ASSERT(team != NULL);
1774
1775 nthreads = task_team->tt.tt_nproc;
1776 KMP_DEBUG_ASSERT(nthreads > 0);
1777 KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
1778
1779 // Allocate or increase the size of threads_data if necessary
1780 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
1781
1782 if (!is_init_thread) {
1783 // Some other thread already set up the array.
1784 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
1785 __kmp_gtid_from_thread( this_thr ) ) );
1786 return;
1787 }
1788 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1789 KMP_DEBUG_ASSERT( threads_data != NULL );
1790
1791 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1792 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
1793 {
1794 // Release any threads sleeping at the barrier, so that they can steal
1795 // tasks and execute them. In extra barrier mode, tasks do not sleep
1796 // at the separate tasking barrier, so this isn't a problem.
1797 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001798 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001799 kmp_info_t *thread = threads_data[i].td.td_thr;
1800
1801 if (i == this_thr->th.th_info.ds.ds_tid) {
1802 continue;
1803 }
1804 // Since we haven't locked the thread's suspend mutex lock at this
1805 // point, there is a small window where a thread might be putting
1806 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001807 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00001808 // see if other threads are sleeping (using the same random
1809 // mechanism that is used for task stealing) and awakens them if
1810 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001811 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001812 {
1813 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
1814 __kmp_gtid_from_thread( this_thr ),
1815 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001816 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001817 }
1818 else {
1819 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
1820 __kmp_gtid_from_thread( this_thr ),
1821 __kmp_gtid_from_thread( thread ) ) );
1822 }
1823 }
1824 }
1825
1826 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
1827 __kmp_gtid_from_thread( this_thr ) ) );
1828}
1829
1830
1831/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001832/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00001833 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
1834 * like a shadow of the kmp_team_t data struct, with a different lifetime.
1835 * After a child * thread checks into a barrier and calls __kmp_release() from
1836 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
1837 * longer assume that the kmp_team_t structure is intact (at any moment, the
1838 * master thread may exit the barrier code and free the team data structure,
1839 * and return the threads to the thread pool).
1840 *
1841 * This does not work with the the tasking code, as the thread is still
1842 * expected to participate in the execution of any tasks that may have been
1843 * spawned my a member of the team, and the thread still needs access to all
1844 * to each thread in the team, so that it can steal work from it.
1845 *
1846 * Enter the existence of the kmp_task_team_t struct. It employs a reference
1847 * counting mechanims, and is allocated by the master thread before calling
1848 * __kmp_<barrier_kind>_release, and then is release by the last thread to
1849 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
1850 * of the kmp_task_team_t structs for consecutive barriers can overlap
1851 * (and will, unless the master thread is the last thread to exit the barrier
1852 * release phase, which is not typical).
1853 *
1854 * The existence of such a struct is useful outside the context of tasking,
1855 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
1856 * so that any performance differences show up when comparing the 2.5 vs. 3.0
1857 * libraries.
1858 *
1859 * We currently use the existence of the threads array as an indicator that
1860 * tasks were spawned since the last barrier. If the structure is to be
1861 * useful outside the context of tasking, then this will have to change, but
1862 * not settting the field minimizes the performance impact of tasking on
1863 * barriers, when no explicit tasks were spawned (pushed, actually).
1864 */
1865
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001866
Jim Cownie5e8470a2013-09-27 10:38:44 +00001867static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
1868// Lock for task team data structures
1869static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
1870
1871
1872//------------------------------------------------------------------------------
1873// __kmp_alloc_task_deque:
1874// Allocates a task deque for a particular thread, and initialize the necessary
1875// data structures relating to the deque. This only happens once per thread
1876// per task team since task teams are recycled.
1877// No lock is needed during allocation since each thread allocates its own
1878// deque.
1879
1880static void
1881__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
1882{
1883 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
1884 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
1885
1886 // Initialize last stolen task field to "none"
1887 thread_data -> td.td_deque_last_stolen = -1;
1888
1889 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
1890 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
1891 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
1892
1893 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
1894 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
1895 // Allocate space for task deque, and zero the deque
1896 // Cannot use __kmp_thread_calloc() because threads not around for
1897 // kmp_reap_task_team( ).
1898 thread_data -> td.td_deque = (kmp_taskdata_t **)
1899 __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
1900}
1901
1902
1903//------------------------------------------------------------------------------
1904// __kmp_free_task_deque:
1905// Deallocates a task deque for a particular thread.
1906// Happens at library deallocation so don't need to reset all thread data fields.
1907
1908static void
1909__kmp_free_task_deque( kmp_thread_data_t *thread_data )
1910{
1911 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1912
1913 if ( thread_data -> td.td_deque != NULL ) {
1914 TCW_4(thread_data -> td.td_deque_ntasks, 0);
1915 __kmp_free( thread_data -> td.td_deque );
1916 thread_data -> td.td_deque = NULL;
1917 }
1918 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1919
1920#ifdef BUILD_TIED_TASK_STACK
1921 // GEH: Figure out what to do here for td_susp_tied_tasks
1922 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
1923 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
1924 }
1925#endif // BUILD_TIED_TASK_STACK
1926}
1927
1928
1929//------------------------------------------------------------------------------
1930// __kmp_realloc_task_threads_data:
1931// Allocates a threads_data array for a task team, either by allocating an initial
1932// array or enlarging an existing array. Only the first thread to get the lock
1933// allocs or enlarges the array and re-initializes the array eleemnts.
1934// That thread returns "TRUE", the rest return "FALSE".
1935// Assumes that the new array size is given by task_team -> tt.tt_nproc.
1936// The current size is given by task_team -> tt.tt_max_threads.
1937
1938static int
1939__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
1940{
1941 kmp_thread_data_t ** threads_data_p;
1942 kmp_int32 nthreads, maxthreads;
1943 int is_init_thread = FALSE;
1944
1945 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
1946 // Already reallocated and initialized.
1947 return FALSE;
1948 }
1949
1950 threads_data_p = & task_team -> tt.tt_threads_data;
1951 nthreads = task_team -> tt.tt_nproc;
1952 maxthreads = task_team -> tt.tt_max_threads;
1953
1954 // All threads must lock when they encounter the first task of the implicit task
1955 // region to make sure threads_data fields are (re)initialized before used.
1956 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
1957
1958 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
1959 // first thread to enable tasking
1960 kmp_team_t *team = thread -> th.th_team;
1961 int i;
1962
1963 is_init_thread = TRUE;
1964 if ( maxthreads < nthreads ) {
1965
1966 if ( *threads_data_p != NULL ) {
1967 kmp_thread_data_t *old_data = *threads_data_p;
1968 kmp_thread_data_t *new_data = NULL;
1969
1970 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
1971 "threads data for task_team %p, new_size = %d, old_size = %d\n",
1972 __kmp_gtid_from_thread( thread ), task_team,
1973 nthreads, maxthreads ) );
1974 // Reallocate threads_data to have more elements than current array
1975 // Cannot use __kmp_thread_realloc() because threads not around for
1976 // kmp_reap_task_team( ). Note all new array entries are initialized
1977 // to zero by __kmp_allocate().
1978 new_data = (kmp_thread_data_t *)
1979 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
1980 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001981 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
1982 (void *) old_data,
1983 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001984
1985#ifdef BUILD_TIED_TASK_STACK
1986 // GEH: Figure out if this is the right thing to do
1987 for (i = maxthreads; i < nthreads; i++) {
1988 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1989 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
1990 }
1991#endif // BUILD_TIED_TASK_STACK
1992 // Install the new data and free the old data
1993 (*threads_data_p) = new_data;
1994 __kmp_free( old_data );
1995 }
1996 else {
1997 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
1998 "threads data for task_team %p, size = %d\n",
1999 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2000 // Make the initial allocate for threads_data array, and zero entries
2001 // Cannot use __kmp_thread_calloc() because threads not around for
2002 // kmp_reap_task_team( ).
2003 *threads_data_p = (kmp_thread_data_t *)
2004 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2005#ifdef BUILD_TIED_TASK_STACK
2006 // GEH: Figure out if this is the right thing to do
2007 for (i = 0; i < nthreads; i++) {
2008 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2009 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2010 }
2011#endif // BUILD_TIED_TASK_STACK
2012 }
2013 task_team -> tt.tt_max_threads = nthreads;
2014 }
2015 else {
2016 // If array has (more than) enough elements, go ahead and use it
2017 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2018 }
2019
2020 // initialize threads_data pointers back to thread_info structures
2021 for (i = 0; i < nthreads; i++) {
2022 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2023 thread_data -> td.td_thr = team -> t.t_threads[i];
2024
2025 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2026 // The last stolen field survives across teams / barrier, and the number
2027 // of threads may have changed. It's possible (likely?) that a new
2028 // parallel region will exhibit the same behavior as the previous region.
2029 thread_data -> td.td_deque_last_stolen = -1;
2030 }
2031 }
2032
2033 KMP_MB();
2034 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2035 }
2036
2037 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2038 return is_init_thread;
2039}
2040
2041
2042//------------------------------------------------------------------------------
2043// __kmp_free_task_threads_data:
2044// Deallocates a threads_data array for a task team, including any attached
2045// tasking deques. Only occurs at library shutdown.
2046
2047static void
2048__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2049{
2050 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2051 if ( task_team -> tt.tt_threads_data != NULL ) {
2052 int i;
2053 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2054 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2055 }
2056 __kmp_free( task_team -> tt.tt_threads_data );
2057 task_team -> tt.tt_threads_data = NULL;
2058 }
2059 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2060}
2061
2062
2063//------------------------------------------------------------------------------
2064// __kmp_allocate_task_team:
2065// Allocates a task team associated with a specific team, taking it from
2066// the global task team free list if possible. Also initializes data structures.
2067
2068static kmp_task_team_t *
2069__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2070{
2071 kmp_task_team_t *task_team = NULL;
2072 int nthreads;
2073
2074 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2075 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2076
2077 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2078 // Take a task team from the task team pool
2079 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2080 if (__kmp_free_task_teams != NULL) {
2081 task_team = __kmp_free_task_teams;
2082 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2083 task_team -> tt.tt_next = NULL;
2084 }
2085 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2086 }
2087
2088 if (task_team == NULL) {
2089 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2090 "task team for team %p\n",
2091 __kmp_gtid_from_thread( thread ), team ) );
2092 // Allocate a new task team if one is not available.
2093 // Cannot use __kmp_thread_malloc() because threads not around for
2094 // kmp_reap_task_team( ).
2095 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2096 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2097 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2098 //task_team -> tt.tt_max_threads = 0;
2099 //task_team -> tt.tt_next = NULL;
2100 }
2101
2102 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2103 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2104
Jim Cownie5e8470a2013-09-27 10:38:44 +00002105 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2106 TCW_4( task_team -> tt.tt_active, TRUE );
2107 TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2108
2109 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2110 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2111 return task_team;
2112}
2113
2114
2115//------------------------------------------------------------------------------
2116// __kmp_free_task_team:
2117// Frees the task team associated with a specific thread, and adds it
2118// to the global task team free list.
2119//
2120
2121static void
2122__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2123{
2124 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2125 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2126
2127 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2128
2129 // Put task team back on free list
2130 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2131
2132 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2133 task_team -> tt.tt_next = __kmp_free_task_teams;
2134 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2135 TCW_PTR(__kmp_free_task_teams, task_team);
2136
2137 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2138}
2139
2140
2141//------------------------------------------------------------------------------
2142// __kmp_reap_task_teams:
2143// Free all the task teams on the task team free list.
2144// Should only be done during library shutdown.
2145// Cannot do anything that needs a thread structure or gtid since they are already gone.
2146
2147void
2148__kmp_reap_task_teams( void )
2149{
2150 kmp_task_team_t *task_team;
2151
2152 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2153 // Free all task_teams on the free list
2154 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2155 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2156 __kmp_free_task_teams = task_team -> tt.tt_next;
2157 task_team -> tt.tt_next = NULL;
2158
2159 // Free threads_data if necessary
2160 if ( task_team -> tt.tt_threads_data != NULL ) {
2161 __kmp_free_task_threads_data( task_team );
2162 }
2163 __kmp_free( task_team );
2164 }
2165 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2166 }
2167}
2168
2169
2170//------------------------------------------------------------------------------
2171// __kmp_unref_task_teams:
2172// Remove one thread from referencing the task team structure by
2173// decreasing the reference count and deallocate task team if no more
2174// references to it.
2175//
2176void
2177__kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2178{
2179 kmp_uint ref_ct;
2180
2181 ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2182
2183 KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2184 __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2185
2186
2187 if ( ref_ct == 0 ) {
2188 __kmp_free_task_team( thread, task_team );
2189 }
2190
2191 TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2192}
2193
2194
2195//------------------------------------------------------------------------------
2196// __kmp_wait_to_unref_task_teams:
2197// Some threads could still be in the fork barrier release code, possibly
2198// trying to steal tasks. Wait for each thread to unreference its task team.
2199//
2200void
2201__kmp_wait_to_unref_task_teams(void)
2202{
2203 kmp_info_t *thread;
2204 kmp_uint32 spins;
2205 int done;
2206
2207 KMP_INIT_YIELD( spins );
2208
2209
2210 for (;;) {
2211 done = TRUE;
2212
2213 // TODO: GEH - this may be is wrong because some sync would be necessary
2214 // in case threads are added to the pool during the traversal.
2215 // Need to verify that lock for thread pool is held when calling
2216 // this routine.
2217 for (thread = (kmp_info_t *)__kmp_thread_pool;
2218 thread != NULL;
2219 thread = thread->th.th_next_pool)
2220 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002221#if KMP_OS_WINDOWS
2222 DWORD exit_val;
2223#endif
2224 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2225 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2226 __kmp_gtid_from_thread( thread ) ) );
2227 continue;
2228 }
2229#if KMP_OS_WINDOWS
2230 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2231 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2232 if (TCR_PTR(thread->th.th_task_team) != NULL) {
2233 __kmp_unref_task_team( thread->th.th_task_team, thread );
2234 }
2235 continue;
2236 }
2237#endif
2238
2239 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2240
2241 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2242 __kmp_gtid_from_thread( thread ) ) );
2243
2244 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002245 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002246 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002247 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002248 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2249 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002250 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002251 }
2252 }
2253 }
2254 if (done) {
2255 break;
2256 }
2257
2258 // If we are oversubscribed,
2259 // or have waited a bit (and library mode is throughput), yield.
2260 // Pause is in the following code.
2261 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2262 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2263 }
2264
2265
2266}
2267
2268
2269//------------------------------------------------------------------------------
2270// __kmp_task_team_setup: Create a task_team for the current team, but use
2271// an already created, unused one if it already exists.
2272// This may be called by any thread, but only for teams with # threads >1.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002273void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002274__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002275{
2276 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2277
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002278 if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( team->t.t_nproc > 1 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002279 // Allocate a new task team, which will be propagated to
2280 // all of the worker threads after the barrier. As they
2281 // spin in the barrier release phase, then will continue
2282 // to use the previous task team struct, until they receive
2283 // the signal to stop checking for tasks (they can't safely
2284 // reference the kmp_team_t struct, which could be reallocated
2285 // by the master thread).
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002286 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2287 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2288 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jim Cownie5e8470a2013-09-27 10:38:44 +00002289 ((team != NULL) ? team->t.t_id : -1)) );
2290 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002291 //else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002292 // All threads have reported in, and no tasks were spawned
2293 // for this release->gather region. Leave the old task
2294 // team struct in place for the upcoming region. No task
2295 // teams are formed for serialized teams.
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002296 if (both) {
2297 int other_team = 1 - this_thr->th.th_task_state;
2298 if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well
2299 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2300 KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2301 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2302 ((team != NULL) ? team->t.t_id : -1)) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002303 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002304 }
2305}
2306
2307
2308//------------------------------------------------------------------------------
2309// __kmp_task_team_sync: Propagation of task team data from team to threads
2310// which happens just after the release phase of a team barrier. This may be
2311// called by any thread, but only for teams with # threads > 1.
2312
2313void
2314__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2315{
2316 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2317
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002318 // In case this thread never saw that the task team was no longer active, unref/deallocate it now.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002319 if ( this_thr->th.th_task_team != NULL ) {
2320 if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2321 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2322 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002323 } else { // We are re-using a task team that was never enabled.
2324 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002325 }
2326 }
2327
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002328 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jim Cownie5e8470a2013-09-27 10:38:44 +00002329 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002330 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2331 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002332 KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2333 __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2334 this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2335}
2336
2337
2338//------------------------------------------------------------------------------
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002339// __kmp_task_team_wait: Master thread waits for outstanding tasks after the
2340// barrier gather phase. Only called by master thread if #threads in team > 1 !
Jim Cownie5e8470a2013-09-27 10:38:44 +00002341void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002342__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002343 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002344 )
2345{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002346 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002347
2348 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2349 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2350
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002351 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002352 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2353 __kmp_gtid_from_thread( this_thr ), task_team ) );
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002354 // All worker threads might have dropped through to the release phase, but could still
2355 // be executing tasks. Wait here for all tasks to complete. To avoid memory contention,
2356 // only the master thread checks for the termination condition.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002357 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2358 flag.wait(this_thr, TRUE
2359 USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002360
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002361 // Kill the old task team, so that the worker threads will stop referencing it while spinning.
2362 // They will deallocate it when the reference count reaches zero.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002363 // The master thread is not included in the ref count.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002364 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2365 __kmp_gtid_from_thread( this_thr ), task_team ) );
2366 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2367 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2368 KMP_MB();
2369
2370 TCW_PTR(this_thr->th.th_task_team, NULL);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002371 team->t.t_task_team[this_thr->th.th_task_state] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002372 }
2373}
2374
2375
2376//------------------------------------------------------------------------------
2377// __kmp_tasking_barrier:
2378// Internal function to execute all tasks prior to a regular barrier or a
2379// join barrier. It is a full barrier itself, which unfortunately turns
2380// regular barriers into double barriers and join barriers into 1 1/2
2381// barriers.
2382// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2383
2384void
2385__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2386{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002387 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002388 int flag = FALSE;
2389 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2390
2391#if USE_ITT_BUILD
2392 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2393#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002394 kmp_flag_32 spin_flag(spin, 0U);
2395 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2396 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002397#if USE_ITT_BUILD
2398 // TODO: What about itt_sync_obj??
2399 KMP_FSYNC_SPIN_PREPARE( spin );
2400#endif /* USE_ITT_BUILD */
2401
2402 if( TCR_4(__kmp_global.g.g_done) ) {
2403 if( __kmp_global.g.g_abort )
2404 __kmp_abort_thread( );
2405 break;
2406 }
2407 KMP_YIELD( TRUE ); // GH: We always yield here
2408 }
2409#if USE_ITT_BUILD
2410 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2411#endif /* USE_ITT_BUILD */
2412}
2413