blob: 35ccb02255b53fa24af423ea7a34b2633da5147e [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003 * $Revision: 43389 $
4 * $Date: 2014-08-11 10:54:01 -0500 (Mon, 11 Aug 2014) $
Jim Cownie5e8470a2013-09-27 10:38:44 +00005 */
6
7
8//===----------------------------------------------------------------------===//
9//
10// The LLVM Compiler Infrastructure
11//
12// This file is dual licensed under the MIT and the University of Illinois Open
13// Source Licenses. See LICENSE.txt for details.
14//
15//===----------------------------------------------------------------------===//
16
17
18#include "kmp.h"
19#include "kmp_i18n.h"
20#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000021#include "kmp_wait_release.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000022
23
Jim Cownie5e8470a2013-09-27 10:38:44 +000024
25/* ------------------------------------------------------------------------ */
26/* ------------------------------------------------------------------------ */
27
28
29/* forward declaration */
30static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
31static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
32static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
33
Jim Cownie4cc4bb42014-10-07 16:25:50 +000034static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
35 switch (((kmp_flag_64 *)flag)->get_type()) {
36 case flag32: __kmp_resume_32(gtid, NULL); break;
37 case flag64: __kmp_resume_64(gtid, NULL); break;
38 case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
39 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000040}
41
42#ifdef BUILD_TIED_TASK_STACK
43
44//---------------------------------------------------------------------------
45// __kmp_trace_task_stack: print the tied tasks from the task stack in order
46// from top do bottom
47//
48// gtid: global thread identifier for thread containing stack
49// thread_data: thread data for task team thread containing stack
50// threshold: value above which the trace statement triggers
51// location: string identifying call site of this function (for trace)
52
53static void
54__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
55{
56 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
57 kmp_taskdata_t **stack_top = task_stack -> ts_top;
58 kmp_int32 entries = task_stack -> ts_entries;
59 kmp_taskdata_t *tied_task;
60
61 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
62 "first_block = %p, stack_top = %p \n",
63 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
64
65 KMP_DEBUG_ASSERT( stack_top != NULL );
66 KMP_DEBUG_ASSERT( entries > 0 );
67
68 while ( entries != 0 )
69 {
70 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
71 // fix up ts_top if we need to pop from previous block
72 if ( entries & TASK_STACK_INDEX_MASK == 0 )
73 {
74 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
75
76 stack_block = stack_block -> sb_prev;
77 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
78 }
79
80 // finish bookkeeping
81 stack_top--;
82 entries--;
83
84 tied_task = * stack_top;
85
86 KMP_DEBUG_ASSERT( tied_task != NULL );
87 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
88
89 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
90 "stack_top=%p, tied_task=%p\n",
91 location, gtid, entries, stack_top, tied_task ) );
92 }
93 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
94
95 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
96 location, gtid ) );
97}
98
99//---------------------------------------------------------------------------
100// __kmp_init_task_stack: initialize the task stack for the first time
101// after a thread_data structure is created.
102// It should not be necessary to do this again (assuming the stack works).
103//
104// gtid: global thread identifier of calling thread
105// thread_data: thread data for task team thread containing stack
106
107static void
108__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
109{
110 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
111 kmp_stack_block_t *first_block;
112
113 // set up the first block of the stack
114 first_block = & task_stack -> ts_first_block;
115 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
116 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
117
118 // initialize the stack to be empty
119 task_stack -> ts_entries = TASK_STACK_EMPTY;
120 first_block -> sb_next = NULL;
121 first_block -> sb_prev = NULL;
122}
123
124
125//---------------------------------------------------------------------------
126// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
127//
128// gtid: global thread identifier for calling thread
129// thread_data: thread info for thread containing stack
130
131static void
132__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
133{
134 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
135 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
136
137 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
138 // free from the second block of the stack
139 while ( stack_block != NULL ) {
140 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
141
142 stack_block -> sb_next = NULL;
143 stack_block -> sb_prev = NULL;
144 if (stack_block != & task_stack -> ts_first_block) {
145 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
146 }
147 stack_block = next_block;
148 }
149 // initialize the stack to be empty
150 task_stack -> ts_entries = 0;
151 task_stack -> ts_top = NULL;
152}
153
154
155//---------------------------------------------------------------------------
156// __kmp_push_task_stack: Push the tied task onto the task stack.
157// Grow the stack if necessary by allocating another block.
158//
159// gtid: global thread identifier for calling thread
160// thread: thread info for thread containing stack
161// tied_task: the task to push on the stack
162
163static void
164__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
165{
166 // GEH - need to consider what to do if tt_threads_data not allocated yet
167 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
168 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
169 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
170
171 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
172 return; // Don't push anything on stack if team or team tasks are serialized
173 }
174
175 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
176 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
177
178 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
179 gtid, thread, tied_task ) );
180 // Store entry
181 * (task_stack -> ts_top) = tied_task;
182
183 // Do bookkeeping for next push
184 task_stack -> ts_top++;
185 task_stack -> ts_entries++;
186
187 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
188 {
189 // Find beginning of this task block
190 kmp_stack_block_t *stack_block =
191 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
192
193 // Check if we already have a block
194 if ( stack_block -> sb_next != NULL )
195 { // reset ts_top to beginning of next block
196 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
197 }
198 else
199 { // Alloc new block and link it up
200 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
201 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
202
203 task_stack -> ts_top = & new_block -> sb_block[0];
204 stack_block -> sb_next = new_block;
205 new_block -> sb_prev = stack_block;
206 new_block -> sb_next = NULL;
207
208 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
209 gtid, tied_task, new_block ) );
210 }
211 }
212 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
213}
214
215//---------------------------------------------------------------------------
216// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
217// the task, just check to make sure it matches the ending task passed in.
218//
219// gtid: global thread identifier for the calling thread
220// thread: thread info structure containing stack
221// tied_task: the task popped off the stack
222// ending_task: the task that is ending (should match popped task)
223
224static void
225__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
226{
227 // GEH - need to consider what to do if tt_threads_data not allocated yet
228 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
229 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
230 kmp_taskdata_t *tied_task;
231
232 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
233 return; // Don't pop anything from stack if team or team tasks are serialized
234 }
235
236 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
237 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
238
239 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
240
241 // fix up ts_top if we need to pop from previous block
242 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
243 {
244 kmp_stack_block_t *stack_block =
245 (kmp_stack_block_t *) (task_stack -> ts_top) ;
246
247 stack_block = stack_block -> sb_prev;
248 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
249 }
250
251 // finish bookkeeping
252 task_stack -> ts_top--;
253 task_stack -> ts_entries--;
254
255 tied_task = * (task_stack -> ts_top );
256
257 KMP_DEBUG_ASSERT( tied_task != NULL );
258 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
259 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
260
261 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
262 return;
263}
264#endif /* BUILD_TIED_TASK_STACK */
265
266//---------------------------------------------------
267// __kmp_push_task: Add a task to the thread's deque
268
269static kmp_int32
270__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
271{
272 kmp_info_t * thread = __kmp_threads[ gtid ];
273 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
274 kmp_task_team_t * task_team = thread->th.th_task_team;
275 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
276 kmp_thread_data_t * thread_data;
277
278 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
279
280 // The first check avoids building task_team thread data if serialized
281 if ( taskdata->td_flags.task_serial ) {
282 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
283 gtid, taskdata ) );
284 return TASK_NOT_PUSHED;
285 }
286
287 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
288 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
289 if ( ! KMP_TASKING_ENABLED( task_team, thread->th.th_task_state ) ) {
290 __kmp_enable_tasking( task_team, thread );
291 }
292 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
293 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
294
295 // Find tasking deque specific to encountering thread
296 thread_data = & task_team -> tt.tt_threads_data[ tid ];
297
298 // No lock needed since only owner can allocate
299 if (thread_data -> td.td_deque == NULL ) {
300 __kmp_alloc_task_deque( thread, thread_data );
301 }
302
303 // Check if deque is full
304 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
305 {
306 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
307 gtid, taskdata ) );
308 return TASK_NOT_PUSHED;
309 }
310
311 // Lock the deque for the task push operation
312 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
313
314 // Must have room since no thread can add tasks but calling thread
315 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
316
317 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
318 // Wrap index.
319 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
320 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
321
322 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
323
324 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
325 "task=%p ntasks=%d head=%u tail=%u\n",
326 gtid, taskdata, thread_data->td.td_deque_ntasks,
327 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
328
329 return TASK_SUCCESSFULLY_PUSHED;
330}
331
332
333//-----------------------------------------------------------------------------------------
334// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
335// this_thr: thread structure to set current_task in.
336
337void
338__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
339{
340 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
341 "curtask_parent=%p\n",
342 0, this_thr, this_thr -> th.th_current_task,
343 this_thr -> th.th_current_task -> td_parent ) );
344
345 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
346
347 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
348 "curtask_parent=%p\n",
349 0, this_thr, this_thr -> th.th_current_task,
350 this_thr -> th.th_current_task -> td_parent ) );
351}
352
353
354//---------------------------------------------------------------------------------------
355// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
356// this_thr: thread structure to set up
357// team: team for implicit task data
358// tid: thread within team to set up
359
360void
361__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
362{
363 // current task of the thread is a parent of the new just created implicit tasks of new team
364 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
365 "parent_task=%p\n",
366 tid, this_thr, this_thr->th.th_current_task,
367 team->t.t_implicit_task_taskdata[tid].td_parent ) );
368
369 KMP_DEBUG_ASSERT (this_thr != NULL);
370
371 if( tid == 0 ) {
372 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
373 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
374 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
375 }
376 } else {
377 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
378 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
379 }
380
381 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
382 "parent_task=%p\n",
383 tid, this_thr, this_thr->th.th_current_task,
384 team->t.t_implicit_task_taskdata[tid].td_parent ) );
385}
386
387
388//----------------------------------------------------------------------
389// __kmp_task_start: bookkeeping for a task starting execution
390// GTID: global thread id of calling thread
391// task: task starting execution
392// current_task: task suspending
393
394static void
395__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
396{
397 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
398 kmp_info_t * thread = __kmp_threads[ gtid ];
399
400 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
401 gtid, taskdata, current_task) );
402
403 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
404
405 // mark currently executing task as suspended
406 // TODO: GEH - make sure root team implicit task is initialized properly.
407 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
408 current_task -> td_flags.executing = 0;
409
410 // Add task to stack if tied
411#ifdef BUILD_TIED_TASK_STACK
412 if ( taskdata -> td_flags.tiedness == TASK_TIED )
413 {
414 __kmp_push_task_stack( gtid, thread, taskdata );
415 }
416#endif /* BUILD_TIED_TASK_STACK */
417
418 // mark starting task as executing and as current task
419 thread -> th.th_current_task = taskdata;
420
421 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
422 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
423 taskdata -> td_flags.started = 1;
424 taskdata -> td_flags.executing = 1;
425 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
426 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
427
428 // GEH TODO: shouldn't we pass some sort of location identifier here?
429 // APT: yes, we will pass location here.
430 // need to store current thread state (in a thread or taskdata structure)
431 // before setting work_state, otherwise wrong state is set after end of task
432
433 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
434 gtid, taskdata ) );
435
436 return;
437}
438
439
440//----------------------------------------------------------------------
441// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
442// loc_ref: source location information; points to beginning of task block.
443// gtid: global thread number.
444// task: task thunk for the started task.
445
446void
447__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
448{
449 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
450 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
451
452 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
453 gtid, loc_ref, taskdata, current_task ) );
454
455 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
456 __kmp_task_start( gtid, task, current_task );
457
458 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
459 gtid, loc_ref, taskdata ) );
460
461 return;
462}
463
464#ifdef TASK_UNUSED
465//----------------------------------------------------------------------
466// __kmpc_omp_task_begin: report that a given task has started execution
467// NEVER GENERATED BY COMPILER, DEPRECATED!!!
468
469void
470__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
471{
472 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
473
474 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
475 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
476
477 __kmp_task_start( gtid, task, current_task );
478
479 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
480 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
481
482 return;
483}
484#endif // TASK_UNUSED
485
486
487//-------------------------------------------------------------------------------------
488// __kmp_free_task: free the current task space and the space for shareds
489// gtid: Global thread ID of calling thread
490// taskdata: task to free
491// thread: thread data structure of caller
492
493static void
494__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
495{
496 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
497 gtid, taskdata) );
498
499 // Check to make sure all flags and counters have the correct values
500 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
501 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
502 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
503 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
504 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
505 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
506
507 taskdata->td_flags.freed = 1;
508 // deallocate the taskdata and shared variable blocks associated with this task
509 #if USE_FAST_MEMORY
510 __kmp_fast_free( thread, taskdata );
511 #else /* ! USE_FAST_MEMORY */
512 __kmp_thread_free( thread, taskdata );
513 #endif
514
515 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
516 gtid, taskdata) );
517}
518
519//-------------------------------------------------------------------------------------
520// __kmp_free_task_and_ancestors: free the current task and ancestors without children
521//
522// gtid: Global thread ID of calling thread
523// taskdata: task to free
524// thread: thread data structure of caller
525
526static void
527__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
528{
529 kmp_int32 children = 0;
530 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
531
532 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
533
534 if ( !team_or_tasking_serialized ) {
535 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
536 KMP_DEBUG_ASSERT( children >= 0 );
537 }
538
539 // Now, go up the ancestor tree to see if any ancestors can now be freed.
540 while ( children == 0 )
541 {
542 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
543
544 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
545 "and freeing itself\n", gtid, taskdata) );
546
547 // --- Deallocate my ancestor task ---
548 __kmp_free_task( gtid, taskdata, thread );
549
550 taskdata = parent_taskdata;
551
552 // Stop checking ancestors at implicit task or if tasking serialized
553 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
554 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
555 return;
556
557 if ( !team_or_tasking_serialized ) {
558 // Predecrement simulated by "- 1" calculation
559 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
560 KMP_DEBUG_ASSERT( children >= 0 );
561 }
562 }
563
564 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
565 "not freeing it yet\n", gtid, taskdata, children) );
566}
567
568//---------------------------------------------------------------------
569// __kmp_task_finish: bookkeeping to do when a task finishes execution
570// gtid: global thread ID for calling thread
571// task: task to be finished
572// resumed_task: task to be resumed. (may be NULL if task is serialized)
573
574static void
575__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
576{
577 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
578 kmp_info_t * thread = __kmp_threads[ gtid ];
579 kmp_int32 children = 0;
580
581 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
582 gtid, taskdata, resumed_task) );
583
584 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
585
586 // Pop task from stack if tied
587#ifdef BUILD_TIED_TASK_STACK
588 if ( taskdata -> td_flags.tiedness == TASK_TIED )
589 {
590 __kmp_pop_task_stack( gtid, thread, taskdata );
591 }
592#endif /* BUILD_TIED_TASK_STACK */
593
Jim Cownie5e8470a2013-09-27 10:38:44 +0000594 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000595 taskdata -> td_flags.complete = 1; // mark the task as completed
596 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
597 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
598
599 // Only need to keep track of count if team parallel and tasking not serialized
600 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
601 // Predecrement simulated by "- 1" calculation
602 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
603 KMP_DEBUG_ASSERT( children >= 0 );
604#if OMP_40_ENABLED
605 if ( taskdata->td_taskgroup )
606 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000607 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000608#endif
609 }
610
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000611 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
612 // Othertwise, if a task is executed immediately from the release_deps code
613 // the flag will be reset to 1 again by this same function
614 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
615 taskdata -> td_flags.executing = 0; // suspend the finishing task
616
Jim Cownie5e8470a2013-09-27 10:38:44 +0000617 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
618 gtid, taskdata, children) );
619
Jim Cownie181b4bb2013-12-23 17:28:57 +0000620#if OMP_40_ENABLED
621 /* If the tasks' destructor thunk flag has been set, we need to invoke the
622 destructor thunk that has been generated by the compiler.
623 The code is placed here, since at this point other tasks might have been released
624 hence overlapping the destructor invokations with some other work in the
625 released tasks. The OpenMP spec is not specific on when the destructors are
626 invoked, so we should be free to choose.
627 */
628 if (taskdata->td_flags.destructors_thunk) {
629 kmp_routine_entry_t destr_thunk = task->destructors;
630 KMP_ASSERT(destr_thunk);
631 destr_thunk(gtid, task);
632 }
633#endif // OMP_40_ENABLED
634
Jim Cownie5e8470a2013-09-27 10:38:44 +0000635 // bookkeeping for resuming task:
636 // GEH - note tasking_ser => task_serial
637 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
638 taskdata->td_flags.task_serial);
639 if ( taskdata->td_flags.task_serial )
640 {
641 if (resumed_task == NULL) {
642 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
643 }
644 else {
645 // verify resumed task passed in points to parent
646 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
647 }
648 }
649 else {
650 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
651 }
652
653 // Free this task and then ancestor tasks if they have no children.
654 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
655
656 __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
657
658 // TODO: GEH - make sure root team implicit task is initialized properly.
659 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
660 resumed_task->td_flags.executing = 1; // resume previous task
661
662 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
663 gtid, taskdata, resumed_task) );
664
665 return;
666}
667
668//---------------------------------------------------------------------
669// __kmpc_omp_task_complete_if0: report that a task has completed execution
670// loc_ref: source location information; points to end of task block.
671// gtid: global thread number.
672// task: task thunk for the completed task.
673
674void
675__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
676{
677 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
678 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
679
680 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
681
682 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
683 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
684
685 return;
686}
687
688#ifdef TASK_UNUSED
689//---------------------------------------------------------------------
690// __kmpc_omp_task_complete: report that a task has completed execution
691// NEVER GENERATED BY COMPILER, DEPRECATED!!!
692
693void
694__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
695{
696 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
697 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
698
699 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
700
701 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
702 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
703 return;
704}
705#endif // TASK_UNUSED
706
707
708//----------------------------------------------------------------------------------------------------
709// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
710//
711// loc_ref: reference to source location of parallel region
712// this_thr: thread data structure corresponding to implicit task
713// team: team for this_thr
714// tid: thread id of given thread within team
715// set_curr_task: TRUE if need to push current task to thread
716// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
717// TODO: Get better loc_ref. Value passed in may be NULL
718
719void
720__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
721{
722 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
723
724 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
725 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
726
727 task->td_task_id = KMP_GEN_TASK_ID();
728 task->td_team = team;
729// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
730 task->td_ident = loc_ref;
731 task->td_taskwait_ident = NULL;
732 task->td_taskwait_counter = 0;
733 task->td_taskwait_thread = 0;
734
735 task->td_flags.tiedness = TASK_TIED;
736 task->td_flags.tasktype = TASK_IMPLICIT;
737 // All implicit tasks are executed immediately, not deferred
738 task->td_flags.task_serial = 1;
739 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
740 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
741
742 task->td_flags.started = 1;
743 task->td_flags.executing = 1;
744 task->td_flags.complete = 0;
745 task->td_flags.freed = 0;
746
Jim Cownie181b4bb2013-12-23 17:28:57 +0000747#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000748 task->td_dephash = NULL;
749 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000750#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000751
752 if (set_curr_task) { // only do this initialization the first time a thread is created
753 task->td_incomplete_child_tasks = 0;
754 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
755#if OMP_40_ENABLED
756 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
757#endif
758 __kmp_push_current_task_to_thread( this_thr, team, tid );
759 } else {
760 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
761 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
762 }
763
764 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
765 tid, team, task ) );
766}
767
768// Round up a size to a power of two specified by val
769// Used to insert padding between structures co-allocated using a single malloc() call
770static size_t
771__kmp_round_up_to_val( size_t size, size_t val ) {
772 if ( size & ( val - 1 ) ) {
773 size &= ~ ( val - 1 );
774 if ( size <= KMP_SIZE_T_MAX - val ) {
775 size += val; // Round up if there is no overflow.
776 }; // if
777 }; // if
778 return size;
779} // __kmp_round_up_to_va
780
781
782//---------------------------------------------------------------------------------
783// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
784//
785// loc_ref: source location information
786// gtid: global thread number.
787// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
788// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
789// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
790// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
791// task_entry: Pointer to task code entry point generated by compiler.
792// returns: a pointer to the allocated kmp_task_t structure (task).
793
794kmp_task_t *
795__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
796 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
797 kmp_routine_entry_t task_entry )
798{
799 kmp_task_t *task;
800 kmp_taskdata_t *taskdata;
801 kmp_info_t *thread = __kmp_threads[ gtid ];
802 kmp_team_t *team = thread->th.th_team;
803 kmp_taskdata_t *parent_task = thread->th.th_current_task;
804 size_t shareds_offset;
805
806 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
807 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
808 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
809 sizeof_shareds, task_entry) );
810
811 if ( parent_task->td_flags.final ) {
812 if (flags->merged_if0) {
813 }
814 flags->final = 1;
815 }
816
817 // Calculate shared structure offset including padding after kmp_task_t struct
818 // to align pointers in shared struct
819 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
820 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
821
822 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
823 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
824 gtid, shareds_offset) );
825 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
826 gtid, sizeof_shareds) );
827
828 // Avoid double allocation here by combining shareds with taskdata
829 #if USE_FAST_MEMORY
830 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
831 #else /* ! USE_FAST_MEMORY */
832 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
833 #endif /* USE_FAST_MEMORY */
834
835 task = KMP_TASKDATA_TO_TASK(taskdata);
836
837 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000838#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000839 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
840 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
841#else
842 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
843 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
844#endif
845 if (sizeof_shareds > 0) {
846 // Avoid double allocation here by combining shareds with taskdata
847 task->shareds = & ((char *) taskdata)[ shareds_offset ];
848 // Make sure shareds struct is aligned to pointer size
849 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
850 } else {
851 task->shareds = NULL;
852 }
853 task->routine = task_entry;
854 task->part_id = 0; // AC: Always start with 0 part id
855
856 taskdata->td_task_id = KMP_GEN_TASK_ID();
857 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000858 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000859 taskdata->td_parent = parent_task;
860 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
861 taskdata->td_ident = loc_ref;
862 taskdata->td_taskwait_ident = NULL;
863 taskdata->td_taskwait_counter = 0;
864 taskdata->td_taskwait_thread = 0;
865 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
866 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
867
868 taskdata->td_flags.tiedness = flags->tiedness;
869 taskdata->td_flags.final = flags->final;
870 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000871#if OMP_40_ENABLED
872 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
873#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000874 taskdata->td_flags.tasktype = TASK_EXPLICIT;
875
876 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
877 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
878
879 // GEH - TODO: fix this to copy parent task's value of team_serial flag
880 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
881
882 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
883 // tasks are not left until program termination to execute. Also, it helps locality to execute
884 // immediately.
885 taskdata->td_flags.task_serial = ( taskdata->td_flags.final
886 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
887
888 taskdata->td_flags.started = 0;
889 taskdata->td_flags.executing = 0;
890 taskdata->td_flags.complete = 0;
891 taskdata->td_flags.freed = 0;
892
893 taskdata->td_flags.native = flags->native;
894
895 taskdata->td_incomplete_child_tasks = 0;
896 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
897#if OMP_40_ENABLED
898 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
899 taskdata->td_dephash = NULL;
900 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000901#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000902 // Only need to keep track of child task counts if team parallel and tasking not serialized
903 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
904 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
905#if OMP_40_ENABLED
906 if ( parent_task->td_taskgroup )
907 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
908#endif
909 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
910 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
911 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
912 }
913 }
914
915 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
916 gtid, taskdata, taskdata->td_parent) );
917
918 return task;
919}
920
921
922kmp_task_t *
923__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
924 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
925 kmp_routine_entry_t task_entry )
926{
927 kmp_task_t *retval;
928 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
929
930 input_flags->native = FALSE;
931 // __kmp_task_alloc() sets up all other runtime flags
932
933 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
934 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
935 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
936 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
937
938 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
939 sizeof_shareds, task_entry );
940
941 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
942
943 return retval;
944}
945
946//-----------------------------------------------------------
947// __kmp_invoke_task: invoke the specified task
948//
949// gtid: global thread ID of caller
950// task: the task to invoke
951// current_task: the task to resume after task invokation
952
953static void
954__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
955{
956 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jim Cownie181b4bb2013-12-23 17:28:57 +0000957#if OMP_40_ENABLED
958 int discard = 0 /* false */;
959#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000960 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
961 gtid, taskdata, current_task) );
962
963 __kmp_task_start( gtid, task, current_task );
964
Jim Cownie181b4bb2013-12-23 17:28:57 +0000965#if OMP_40_ENABLED
966 // TODO: cancel tasks if the parallel region has also been cancelled
967 // TODO: check if this sequence can be hoisted above __kmp_task_start
968 // if cancellation has been enabled for this run ...
969 if (__kmp_omp_cancellation) {
970 kmp_info_t *this_thr = __kmp_threads [ gtid ];
971 kmp_team_t * this_team = this_thr->th.th_team;
972 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
973 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
974 // this task belongs to a task group and we need to cancel it
975 discard = 1 /* true */;
976 }
977 }
978
Jim Cownie5e8470a2013-09-27 10:38:44 +0000979 //
980 // Invoke the task routine and pass in relevant data.
981 // Thunks generated by gcc take a different argument list.
982 //
Jim Cownie181b4bb2013-12-23 17:28:57 +0000983 if (!discard) {
984#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000985#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +0000986 if (taskdata->td_flags.native) {
987 ((void (*)(void *))(*(task->routine)))(task->shareds);
988 }
989 else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000990#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +0000991 {
992 (*(task->routine))(gtid, task);
993 }
994#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000995 }
Jim Cownie181b4bb2013-12-23 17:28:57 +0000996#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000997
998 __kmp_task_finish( gtid, task, current_task );
999
1000 KA_TRACE(30, ("__kmp_inovke_task(exit): T#%d completed task %p, resuming task %p\n",
1001 gtid, taskdata, current_task) );
1002 return;
1003}
1004
1005//-----------------------------------------------------------------------
1006// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1007//
1008// loc_ref: location of original task pragma (ignored)
1009// gtid: Global Thread ID of encountering thread
1010// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1011// Returns:
1012// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1013// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1014
1015kmp_int32
1016__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1017{
1018 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1019
1020 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1021 gtid, loc_ref, new_taskdata ) );
1022
1023 /* Should we execute the new task or queue it? For now, let's just always try to
1024 queue it. If the queue fills up, then we'll execute it. */
1025
1026 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1027 { // Execute this task immediately
1028 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1029 new_taskdata->td_flags.task_serial = 1;
1030 __kmp_invoke_task( gtid, new_task, current_task );
1031 }
1032
1033 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1034 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1035 new_taskdata ) );
1036
1037 return TASK_CURRENT_NOT_QUEUED;
1038}
1039
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001040//---------------------------------------------------------------------
1041// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1042// gtid: Global Thread ID of encountering thread
1043// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1044// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1045// returns:
1046//
1047// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1048// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1049kmp_int32
1050__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1051{
1052 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1053
1054 /* Should we execute the new task or queue it? For now, let's just always try to
1055 queue it. If the queue fills up, then we'll execute it. */
1056
1057 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1058 { // Execute this task immediately
1059 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1060 if ( serialize_immediate )
1061 new_taskdata -> td_flags.task_serial = 1;
1062 __kmp_invoke_task( gtid, new_task, current_task );
1063 }
1064
1065
1066 return TASK_CURRENT_NOT_QUEUED;
1067}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001068
1069//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001070// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1071// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001072// loc_ref: location of original task pragma (ignored)
1073// gtid: Global Thread ID of encountering thread
1074// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1075// returns:
1076//
1077// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1078// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1079
1080kmp_int32
1081__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1082{
1083 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001084 kmp_int32 res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001085
1086 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1087 gtid, loc_ref, new_taskdata ) );
1088
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001089 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001090
1091 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1092 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001093 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001094}
1095
Jim Cownie5e8470a2013-09-27 10:38:44 +00001096//-------------------------------------------------------------------------------------
1097// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1098
1099kmp_int32
1100__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1101{
1102 kmp_taskdata_t * taskdata;
1103 kmp_info_t * thread;
1104 int thread_finished = FALSE;
1105
1106 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1107 gtid, loc_ref) );
1108
1109 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1110 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1111
1112 thread = __kmp_threads[ gtid ];
1113 taskdata = thread -> th.th_current_task;
1114#if USE_ITT_BUILD
1115 // Note: These values are used by ITT events as well.
1116#endif /* USE_ITT_BUILD */
1117 taskdata->td_taskwait_counter += 1;
1118 taskdata->td_taskwait_ident = loc_ref;
1119 taskdata->td_taskwait_thread = gtid + 1;
1120
1121#if USE_ITT_BUILD
1122 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1123 if ( itt_sync_obj != NULL )
1124 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1125#endif /* USE_ITT_BUILD */
1126
1127 if ( ! taskdata->td_flags.team_serial ) {
1128 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001129 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001130 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001131 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1132 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001133 }
1134 }
1135#if USE_ITT_BUILD
1136 if ( itt_sync_obj != NULL )
1137 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1138#endif /* USE_ITT_BUILD */
1139
1140 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1141 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1142 }
1143
1144 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1145 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1146
1147 return TASK_CURRENT_NOT_QUEUED;
1148}
1149
1150
1151//-------------------------------------------------
1152// __kmpc_omp_taskyield: switch to a different task
1153
1154kmp_int32
1155__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1156{
1157 kmp_taskdata_t * taskdata;
1158 kmp_info_t * thread;
1159 int thread_finished = FALSE;
1160
1161 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1162 gtid, loc_ref, end_part) );
1163
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001164 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001165 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1166
1167 thread = __kmp_threads[ gtid ];
1168 taskdata = thread -> th.th_current_task;
1169 // Should we model this as a task wait or not?
1170#if USE_ITT_BUILD
1171 // Note: These values are used by ITT events as well.
1172#endif /* USE_ITT_BUILD */
1173 taskdata->td_taskwait_counter += 1;
1174 taskdata->td_taskwait_ident = loc_ref;
1175 taskdata->td_taskwait_thread = gtid + 1;
1176
1177#if USE_ITT_BUILD
1178 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1179 if ( itt_sync_obj != NULL )
1180 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1181#endif /* USE_ITT_BUILD */
1182 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001183 kmp_task_team_t * task_team = thread->th.th_task_team;
1184 if (task_team != NULL) {
1185 if (KMP_TASKING_ENABLED(task_team, thread->th.th_task_state)) {
1186 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1187 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1188 }
1189 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001190 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001191#if USE_ITT_BUILD
1192 if ( itt_sync_obj != NULL )
1193 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1194#endif /* USE_ITT_BUILD */
1195
1196 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1197 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1198 }
1199
1200 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1201 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1202
1203 return TASK_CURRENT_NOT_QUEUED;
1204}
1205
1206
1207#if OMP_40_ENABLED
1208//-------------------------------------------------------------------------------------
1209// __kmpc_taskgroup: Start a new taskgroup
1210
1211void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001212__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001213{
1214 kmp_info_t * thread = __kmp_threads[ gtid ];
1215 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1216 kmp_taskgroup_t * tg_new =
1217 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1218 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1219 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001220 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001221 tg_new->parent = taskdata->td_taskgroup;
1222 taskdata->td_taskgroup = tg_new;
1223}
1224
1225
1226//-------------------------------------------------------------------------------------
1227// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1228// and its descendants are complete
1229
1230void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001231__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001232{
1233 kmp_info_t * thread = __kmp_threads[ gtid ];
1234 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1235 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1236 int thread_finished = FALSE;
1237
1238 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1239 KMP_DEBUG_ASSERT( taskgroup != NULL );
1240
1241 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1242#if USE_ITT_BUILD
1243 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1244 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1245 if ( itt_sync_obj != NULL )
1246 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1247#endif /* USE_ITT_BUILD */
1248
1249 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001250 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001251 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001252 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1253 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001254 }
1255 }
1256
1257#if USE_ITT_BUILD
1258 if ( itt_sync_obj != NULL )
1259 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1260#endif /* USE_ITT_BUILD */
1261 }
1262 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1263
1264 // Restore parent taskgroup for the current task
1265 taskdata->td_taskgroup = taskgroup->parent;
1266 __kmp_thread_free( thread, taskgroup );
1267
1268 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1269}
1270#endif
1271
1272
1273//------------------------------------------------------
1274// __kmp_remove_my_task: remove a task from my own deque
1275
1276static kmp_task_t *
1277__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1278 kmp_int32 is_constrained )
1279{
1280 kmp_task_t * task;
1281 kmp_taskdata_t * taskdata;
1282 kmp_thread_data_t *thread_data;
1283 kmp_uint32 tail;
1284
1285 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1286 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1287
1288 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1289
1290 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1291 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1292 thread_data->td.td_deque_tail) );
1293
1294 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1295 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1296 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1297 thread_data->td.td_deque_tail) );
1298 return NULL;
1299 }
1300
1301 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1302
1303 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1304 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1305 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1306 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1307 thread_data->td.td_deque_tail) );
1308 return NULL;
1309 }
1310
1311 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1312 taskdata = thread_data -> td.td_deque[ tail ];
1313
1314 if (is_constrained) {
1315 // we need to check if the candidate obeys task scheduling constraint:
1316 // only child of current task can be scheduled
1317 kmp_taskdata_t * current = thread->th.th_current_task;
1318 kmp_int32 level = current->td_level;
1319 kmp_taskdata_t * parent = taskdata->td_parent;
1320 while ( parent != current && parent->td_level > level ) {
1321 parent = parent->td_parent; // check generation up to the level of the current task
1322 KMP_DEBUG_ASSERT(parent != NULL);
1323 }
1324 if ( parent != current ) {
1325 // If the tail task is not a child, then no other childs can appear in the deque.
1326 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1327 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1328 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1329 thread_data->td.td_deque_tail) );
1330 return NULL;
1331 }
1332 }
1333
1334 thread_data -> td.td_deque_tail = tail;
1335 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1336
1337 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1338
1339 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1340 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1341 thread_data->td.td_deque_tail) );
1342
1343 task = KMP_TASKDATA_TO_TASK( taskdata );
1344 return task;
1345}
1346
1347
1348//-----------------------------------------------------------
1349// __kmp_steal_task: remove a task from another thread's deque
1350// Assume that calling thread has already checked existence of
1351// task_team thread_data before calling this routine.
1352
1353static kmp_task_t *
1354__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1355 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1356 kmp_int32 is_constrained )
1357{
1358 kmp_task_t * task;
1359 kmp_taskdata_t * taskdata;
1360 kmp_thread_data_t *victim_td, *threads_data;
1361 kmp_int32 victim_tid, thread_tid;
1362
1363 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1364
1365 threads_data = task_team -> tt.tt_threads_data;
1366 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1367
1368 victim_tid = victim->th.th_info.ds.ds_tid;
1369 victim_td = & threads_data[ victim_tid ];
1370
1371 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1372 "head=%u tail=%u\n",
1373 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1374 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1375
1376 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1377 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1378 {
1379 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1380 "ntasks=%d head=%u tail=%u\n",
1381 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1382 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1383 return NULL;
1384 }
1385
1386 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1387
1388 // Check again after we acquire the lock
1389 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1390 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1391 {
1392 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1393 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1394 "ntasks=%d head=%u tail=%u\n",
1395 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1396 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1397 return NULL;
1398 }
1399
1400 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1401
1402 if ( !is_constrained ) {
1403 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1404 // Bump head pointer and Wrap.
1405 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1406 } else {
1407 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1408 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1409 taskdata = victim_td -> td.td_deque[ tail ];
1410 // we need to check if the candidate obeys task scheduling constraint:
1411 // only child of current task can be scheduled
1412 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1413 kmp_int32 level = current->td_level;
1414 kmp_taskdata_t * parent = taskdata->td_parent;
1415 while ( parent != current && parent->td_level > level ) {
1416 parent = parent->td_parent; // check generation up to the level of the current task
1417 KMP_DEBUG_ASSERT(parent != NULL);
1418 }
1419 if ( parent != current ) {
1420 // If the tail task is not a child, then no other childs can appear in the deque (?).
1421 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1422 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1423 "ntasks=%d head=%u tail=%u\n",
1424 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1425 task_team, victim_td->td.td_deque_ntasks,
1426 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1427 return NULL;
1428 }
1429 victim_td -> td.td_deque_tail = tail;
1430 }
1431 if (*thread_finished) {
1432 // We need to un-mark this victim as a finished victim. This must be done before
1433 // releasing the lock, or else other threads (starting with the master victim)
1434 // might be prematurely released from the barrier!!!
1435 kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1436
1437 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1438 gtid, count + 1, task_team) );
1439
1440 *thread_finished = FALSE;
1441 }
1442 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1443
1444 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1445
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001446 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001447 "ntasks=%d head=%u tail=%u\n",
1448 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1449 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1450 victim_td->td.td_deque_tail) );
1451
1452 task = KMP_TASKDATA_TO_TASK( taskdata );
1453 return task;
1454}
1455
1456
1457//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001458// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001459// is statisfied (return true) or there are none left (return false).
1460// final_spin is TRUE if this is the spin at the release barrier.
1461// thread_finished indicates whether the thread is finished executing all
1462// the tasks it has on its deque, and is at the release barrier.
1463// spinner is the location on which to spin.
1464// spinner == NULL means only execute a single task and return.
1465// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001466template <class C>
1467static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1468 int *thread_finished
1469 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001470{
1471 kmp_task_team_t * task_team;
1472 kmp_team_t * team;
1473 kmp_thread_data_t * threads_data;
1474 kmp_task_t * task;
1475 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1476 volatile kmp_uint32 * unfinished_threads;
1477 kmp_int32 nthreads, last_stolen, k, tid;
1478
1479 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1480 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1481
1482 task_team = thread -> th.th_task_team;
1483 KMP_DEBUG_ASSERT( task_team != NULL );
1484
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001485 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001486 gtid, final_spin, *thread_finished) );
1487
1488 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1489 KMP_DEBUG_ASSERT( threads_data != NULL );
1490
1491 nthreads = task_team -> tt.tt_nproc;
1492 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1493 KMP_DEBUG_ASSERT( nthreads > 1 );
1494 KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1495
1496 // Choose tasks from our own work queue.
1497 start:
1498 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1499#if USE_ITT_BUILD && USE_ITT_NOTIFY
1500 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1501 if ( itt_sync_obj == NULL ) {
1502 // we are at fork barrier where we could not get the object reliably
1503 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1504 }
1505 __kmp_itt_task_starting( itt_sync_obj );
1506 }
1507#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1508 __kmp_invoke_task( gtid, task, current_task );
1509#if USE_ITT_BUILD
1510 if ( itt_sync_obj != NULL )
1511 __kmp_itt_task_finished( itt_sync_obj );
1512#endif /* USE_ITT_BUILD */
1513
1514 // If this thread is only partway through the barrier and the condition
1515 // is met, then return now, so that the barrier gather/release pattern can proceed.
1516 // If this thread is in the last spin loop in the barrier, waiting to be
1517 // released, we know that the termination condition will not be satisified,
1518 // so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001519 if (flag == NULL || (!final_spin && flag->done_check())) {
1520 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001521 return TRUE;
1522 }
1523 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1524 }
1525
1526 // This thread's work queue is empty. If we are in the final spin loop
1527 // of the barrier, check and see if the termination condition is satisfied.
1528 if (final_spin) {
1529 // First, decrement the #unfinished threads, if that has not already
1530 // been done. This decrement might be to the spin location, and
1531 // result in the termination condition being satisfied.
1532 if (! *thread_finished) {
1533 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001534 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001535 gtid, count, task_team) );
1536 *thread_finished = TRUE;
1537 }
1538
1539 // It is now unsafe to reference thread->th.th_team !!!
1540 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1541 // thread to pass through the barrier, where it might reset each thread's
1542 // th.th_team field for the next parallel region.
1543 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001544 if (flag != NULL && flag->done_check()) {
1545 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001546 return TRUE;
1547 }
1548 }
1549
1550 // Try to steal from the last place I stole from successfully.
1551 tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1552 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1553
1554 if (last_stolen != -1) {
1555 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1556
1557 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1558 thread_finished, is_constrained )) != NULL)
1559 {
1560#if USE_ITT_BUILD && USE_ITT_NOTIFY
1561 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1562 if ( itt_sync_obj == NULL ) {
1563 // we are at fork barrier where we could not get the object reliably
1564 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1565 }
1566 __kmp_itt_task_starting( itt_sync_obj );
1567 }
1568#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1569 __kmp_invoke_task( gtid, task, current_task );
1570#if USE_ITT_BUILD
1571 if ( itt_sync_obj != NULL )
1572 __kmp_itt_task_finished( itt_sync_obj );
1573#endif /* USE_ITT_BUILD */
1574
1575 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001576 if (flag == NULL || (!final_spin && flag->done_check())) {
1577 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001578 gtid) );
1579 return TRUE;
1580 }
1581
1582 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1583 // If the execution of the stolen task resulted in more tasks being
1584 // placed on our run queue, then restart the whole process.
1585 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001586 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001587 gtid) );
1588 goto start;
1589 }
1590 }
1591
1592 // Don't give priority to stealing from this thread anymore.
1593 threads_data[ tid ].td.td_deque_last_stolen = -1;
1594
1595 // The victims's work queue is empty. If we are in the final spin loop
1596 // of the barrier, check and see if the termination condition is satisfied.
1597 if (final_spin) {
1598 // First, decrement the #unfinished threads, if that has not already
1599 // been done. This decrement might be to the spin location, and
1600 // result in the termination condition being satisfied.
1601 if (! *thread_finished) {
1602 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001603 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001604 "task_team=%p\n", gtid, count, task_team) );
1605 *thread_finished = TRUE;
1606 }
1607
1608 // If __kmp_tasking_mode != tskm_immediate_exec
1609 // then it is now unsafe to reference thread->th.th_team !!!
1610 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1611 // thread to pass through the barrier, where it might reset each thread's
1612 // th.th_team field for the next parallel region.
1613 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001614 if (flag != NULL && flag->done_check()) {
1615 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001616 gtid) );
1617 return TRUE;
1618 }
1619 }
1620 }
1621
1622 // Find a different thread to steal work from. Pick a random thread.
1623 // My initial plan was to cycle through all the threads, and only return
1624 // if we tried to steal from every thread, and failed. Arch says that's
1625 // not such a great idea.
1626 // GEH - need yield code in this loop for throughput library mode?
1627 new_victim:
1628 k = __kmp_get_random( thread ) % (nthreads - 1);
1629 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1630 ++k; // Adjusts random distribution to exclude self
1631 }
1632 {
1633 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1634 int first;
1635
1636 // There is a slight chance that __kmp_enable_tasking() did not wake up
1637 // all threads waiting at the barrier. If this thread is sleeping, then
1638 // then wake it up. Since we weree going to pay the cache miss penalty
1639 // for referenceing another thread's kmp_info_t struct anyway, the check
1640 // shouldn't cost too much performance at this point.
1641 // In extra barrier mode, tasks do not sleep at the separate tasking
1642 // barrier, so this isn't a problem.
1643 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1644 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1645 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1646 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001647 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001648 // A sleeping thread should not have any tasks on it's queue.
Alp Toker8f2d3f02014-02-24 10:40:15 +00001649 // There is a slight possibility that it resumes, steals a task from
Jim Cownie5e8470a2013-09-27 10:38:44 +00001650 // another thread, which spawns more tasks, all in the that it takes
1651 // this thread to check => don't write an assertion that the victim's
1652 // queue is empty. Try stealing from a different thread.
1653 goto new_victim;
1654 }
1655
1656 // Now try to steal work from the selected thread
1657 first = TRUE;
1658 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1659 thread_finished, is_constrained )) != NULL)
1660 {
1661#if USE_ITT_BUILD && USE_ITT_NOTIFY
1662 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1663 if ( itt_sync_obj == NULL ) {
1664 // we are at fork barrier where we could not get the object reliably
1665 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1666 }
1667 __kmp_itt_task_starting( itt_sync_obj );
1668 }
1669#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1670 __kmp_invoke_task( gtid, task, current_task );
1671#if USE_ITT_BUILD
1672 if ( itt_sync_obj != NULL )
1673 __kmp_itt_task_finished( itt_sync_obj );
1674#endif /* USE_ITT_BUILD */
1675
1676 // Try stealing from this victim again, in the future.
1677 if (first) {
1678 threads_data[ tid ].td.td_deque_last_stolen = k;
1679 first = FALSE;
1680 }
1681
1682 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001683 if (flag == NULL || (!final_spin && flag->done_check())) {
1684 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001685 gtid) );
1686 return TRUE;
1687 }
1688 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1689
1690 // If the execution of the stolen task resulted in more tasks being
1691 // placed on our run queue, then restart the whole process.
1692 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001693 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001694 gtid) );
1695 goto start;
1696 }
1697 }
1698
1699 // The victims's work queue is empty. If we are in the final spin loop
1700 // of the barrier, check and see if the termination condition is satisfied.
1701 // Going on and finding a new victim to steal from is expensive, as it
1702 // involves a lot of cache misses, so we definitely want to re-check the
1703 // termination condition before doing that.
1704 if (final_spin) {
1705 // First, decrement the #unfinished threads, if that has not already
1706 // been done. This decrement might be to the spin location, and
1707 // result in the termination condition being satisfied.
1708 if (! *thread_finished) {
1709 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001710 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001711 "task_team=%p\n",
1712 gtid, count, task_team) );
1713 *thread_finished = TRUE;
1714 }
1715
1716 // If __kmp_tasking_mode != tskm_immediate_exec,
1717 // then it is now unsafe to reference thread->th.th_team !!!
1718 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1719 // thread to pass through the barrier, where it might reset each thread's
1720 // th.th_team field for the next parallel region.
1721 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001722 if (flag != NULL && flag->done_check()) {
1723 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001724 return TRUE;
1725 }
1726 }
1727 }
1728
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001729 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001730 return FALSE;
1731}
1732
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001733int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1734 int *thread_finished
1735 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1736{
1737 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1738 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1739}
1740
1741int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
1742 int *thread_finished
1743 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1744{
1745 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1746 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1747}
1748
1749int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
1750 int *thread_finished
1751 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1752{
1753 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1754 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1755}
1756
1757
Jim Cownie5e8470a2013-09-27 10:38:44 +00001758
1759//-----------------------------------------------------------------------------
1760// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
1761// next barrier so they can assist in executing enqueued tasks.
1762// First thread in allocates the task team atomically.
1763
1764static void
1765__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
1766{
1767 kmp_team_t *team = this_thr->th.th_team;
1768 kmp_thread_data_t *threads_data;
1769 int nthreads, i, is_init_thread;
1770
1771 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
1772 __kmp_gtid_from_thread( this_thr ) ) );
1773
1774 KMP_DEBUG_ASSERT(task_team != NULL);
1775 KMP_DEBUG_ASSERT(team != NULL);
1776
1777 nthreads = task_team->tt.tt_nproc;
1778 KMP_DEBUG_ASSERT(nthreads > 0);
1779 KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
1780
1781 // Allocate or increase the size of threads_data if necessary
1782 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
1783
1784 if (!is_init_thread) {
1785 // Some other thread already set up the array.
1786 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
1787 __kmp_gtid_from_thread( this_thr ) ) );
1788 return;
1789 }
1790 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1791 KMP_DEBUG_ASSERT( threads_data != NULL );
1792
1793 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1794 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
1795 {
1796 // Release any threads sleeping at the barrier, so that they can steal
1797 // tasks and execute them. In extra barrier mode, tasks do not sleep
1798 // at the separate tasking barrier, so this isn't a problem.
1799 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001800 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001801 kmp_info_t *thread = threads_data[i].td.td_thr;
1802
1803 if (i == this_thr->th.th_info.ds.ds_tid) {
1804 continue;
1805 }
1806 // Since we haven't locked the thread's suspend mutex lock at this
1807 // point, there is a small window where a thread might be putting
1808 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001809 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00001810 // see if other threads are sleeping (using the same random
1811 // mechanism that is used for task stealing) and awakens them if
1812 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001813 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001814 {
1815 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
1816 __kmp_gtid_from_thread( this_thr ),
1817 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001818 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001819 }
1820 else {
1821 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
1822 __kmp_gtid_from_thread( this_thr ),
1823 __kmp_gtid_from_thread( thread ) ) );
1824 }
1825 }
1826 }
1827
1828 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
1829 __kmp_gtid_from_thread( this_thr ) ) );
1830}
1831
1832
1833/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001834/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00001835 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
1836 * like a shadow of the kmp_team_t data struct, with a different lifetime.
1837 * After a child * thread checks into a barrier and calls __kmp_release() from
1838 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
1839 * longer assume that the kmp_team_t structure is intact (at any moment, the
1840 * master thread may exit the barrier code and free the team data structure,
1841 * and return the threads to the thread pool).
1842 *
1843 * This does not work with the the tasking code, as the thread is still
1844 * expected to participate in the execution of any tasks that may have been
1845 * spawned my a member of the team, and the thread still needs access to all
1846 * to each thread in the team, so that it can steal work from it.
1847 *
1848 * Enter the existence of the kmp_task_team_t struct. It employs a reference
1849 * counting mechanims, and is allocated by the master thread before calling
1850 * __kmp_<barrier_kind>_release, and then is release by the last thread to
1851 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
1852 * of the kmp_task_team_t structs for consecutive barriers can overlap
1853 * (and will, unless the master thread is the last thread to exit the barrier
1854 * release phase, which is not typical).
1855 *
1856 * The existence of such a struct is useful outside the context of tasking,
1857 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
1858 * so that any performance differences show up when comparing the 2.5 vs. 3.0
1859 * libraries.
1860 *
1861 * We currently use the existence of the threads array as an indicator that
1862 * tasks were spawned since the last barrier. If the structure is to be
1863 * useful outside the context of tasking, then this will have to change, but
1864 * not settting the field minimizes the performance impact of tasking on
1865 * barriers, when no explicit tasks were spawned (pushed, actually).
1866 */
1867
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001868
Jim Cownie5e8470a2013-09-27 10:38:44 +00001869static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
1870// Lock for task team data structures
1871static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
1872
1873
1874//------------------------------------------------------------------------------
1875// __kmp_alloc_task_deque:
1876// Allocates a task deque for a particular thread, and initialize the necessary
1877// data structures relating to the deque. This only happens once per thread
1878// per task team since task teams are recycled.
1879// No lock is needed during allocation since each thread allocates its own
1880// deque.
1881
1882static void
1883__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
1884{
1885 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
1886 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
1887
1888 // Initialize last stolen task field to "none"
1889 thread_data -> td.td_deque_last_stolen = -1;
1890
1891 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
1892 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
1893 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
1894
1895 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
1896 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
1897 // Allocate space for task deque, and zero the deque
1898 // Cannot use __kmp_thread_calloc() because threads not around for
1899 // kmp_reap_task_team( ).
1900 thread_data -> td.td_deque = (kmp_taskdata_t **)
1901 __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
1902}
1903
1904
1905//------------------------------------------------------------------------------
1906// __kmp_free_task_deque:
1907// Deallocates a task deque for a particular thread.
1908// Happens at library deallocation so don't need to reset all thread data fields.
1909
1910static void
1911__kmp_free_task_deque( kmp_thread_data_t *thread_data )
1912{
1913 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1914
1915 if ( thread_data -> td.td_deque != NULL ) {
1916 TCW_4(thread_data -> td.td_deque_ntasks, 0);
1917 __kmp_free( thread_data -> td.td_deque );
1918 thread_data -> td.td_deque = NULL;
1919 }
1920 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1921
1922#ifdef BUILD_TIED_TASK_STACK
1923 // GEH: Figure out what to do here for td_susp_tied_tasks
1924 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
1925 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
1926 }
1927#endif // BUILD_TIED_TASK_STACK
1928}
1929
1930
1931//------------------------------------------------------------------------------
1932// __kmp_realloc_task_threads_data:
1933// Allocates a threads_data array for a task team, either by allocating an initial
1934// array or enlarging an existing array. Only the first thread to get the lock
1935// allocs or enlarges the array and re-initializes the array eleemnts.
1936// That thread returns "TRUE", the rest return "FALSE".
1937// Assumes that the new array size is given by task_team -> tt.tt_nproc.
1938// The current size is given by task_team -> tt.tt_max_threads.
1939
1940static int
1941__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
1942{
1943 kmp_thread_data_t ** threads_data_p;
1944 kmp_int32 nthreads, maxthreads;
1945 int is_init_thread = FALSE;
1946
1947 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
1948 // Already reallocated and initialized.
1949 return FALSE;
1950 }
1951
1952 threads_data_p = & task_team -> tt.tt_threads_data;
1953 nthreads = task_team -> tt.tt_nproc;
1954 maxthreads = task_team -> tt.tt_max_threads;
1955
1956 // All threads must lock when they encounter the first task of the implicit task
1957 // region to make sure threads_data fields are (re)initialized before used.
1958 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
1959
1960 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
1961 // first thread to enable tasking
1962 kmp_team_t *team = thread -> th.th_team;
1963 int i;
1964
1965 is_init_thread = TRUE;
1966 if ( maxthreads < nthreads ) {
1967
1968 if ( *threads_data_p != NULL ) {
1969 kmp_thread_data_t *old_data = *threads_data_p;
1970 kmp_thread_data_t *new_data = NULL;
1971
1972 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
1973 "threads data for task_team %p, new_size = %d, old_size = %d\n",
1974 __kmp_gtid_from_thread( thread ), task_team,
1975 nthreads, maxthreads ) );
1976 // Reallocate threads_data to have more elements than current array
1977 // Cannot use __kmp_thread_realloc() because threads not around for
1978 // kmp_reap_task_team( ). Note all new array entries are initialized
1979 // to zero by __kmp_allocate().
1980 new_data = (kmp_thread_data_t *)
1981 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
1982 // copy old data to new data
1983 memcpy( (void *) new_data, (void *) old_data,
1984 maxthreads * sizeof(kmp_taskdata_t *) );
1985
1986#ifdef BUILD_TIED_TASK_STACK
1987 // GEH: Figure out if this is the right thing to do
1988 for (i = maxthreads; i < nthreads; i++) {
1989 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1990 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
1991 }
1992#endif // BUILD_TIED_TASK_STACK
1993 // Install the new data and free the old data
1994 (*threads_data_p) = new_data;
1995 __kmp_free( old_data );
1996 }
1997 else {
1998 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
1999 "threads data for task_team %p, size = %d\n",
2000 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2001 // Make the initial allocate for threads_data array, and zero entries
2002 // Cannot use __kmp_thread_calloc() because threads not around for
2003 // kmp_reap_task_team( ).
2004 *threads_data_p = (kmp_thread_data_t *)
2005 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2006#ifdef BUILD_TIED_TASK_STACK
2007 // GEH: Figure out if this is the right thing to do
2008 for (i = 0; i < nthreads; i++) {
2009 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2010 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2011 }
2012#endif // BUILD_TIED_TASK_STACK
2013 }
2014 task_team -> tt.tt_max_threads = nthreads;
2015 }
2016 else {
2017 // If array has (more than) enough elements, go ahead and use it
2018 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2019 }
2020
2021 // initialize threads_data pointers back to thread_info structures
2022 for (i = 0; i < nthreads; i++) {
2023 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2024 thread_data -> td.td_thr = team -> t.t_threads[i];
2025
2026 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2027 // The last stolen field survives across teams / barrier, and the number
2028 // of threads may have changed. It's possible (likely?) that a new
2029 // parallel region will exhibit the same behavior as the previous region.
2030 thread_data -> td.td_deque_last_stolen = -1;
2031 }
2032 }
2033
2034 KMP_MB();
2035 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2036 }
2037
2038 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2039 return is_init_thread;
2040}
2041
2042
2043//------------------------------------------------------------------------------
2044// __kmp_free_task_threads_data:
2045// Deallocates a threads_data array for a task team, including any attached
2046// tasking deques. Only occurs at library shutdown.
2047
2048static void
2049__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2050{
2051 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2052 if ( task_team -> tt.tt_threads_data != NULL ) {
2053 int i;
2054 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2055 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2056 }
2057 __kmp_free( task_team -> tt.tt_threads_data );
2058 task_team -> tt.tt_threads_data = NULL;
2059 }
2060 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2061}
2062
2063
2064//------------------------------------------------------------------------------
2065// __kmp_allocate_task_team:
2066// Allocates a task team associated with a specific team, taking it from
2067// the global task team free list if possible. Also initializes data structures.
2068
2069static kmp_task_team_t *
2070__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2071{
2072 kmp_task_team_t *task_team = NULL;
2073 int nthreads;
2074
2075 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2076 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2077
2078 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2079 // Take a task team from the task team pool
2080 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2081 if (__kmp_free_task_teams != NULL) {
2082 task_team = __kmp_free_task_teams;
2083 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2084 task_team -> tt.tt_next = NULL;
2085 }
2086 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2087 }
2088
2089 if (task_team == NULL) {
2090 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2091 "task team for team %p\n",
2092 __kmp_gtid_from_thread( thread ), team ) );
2093 // Allocate a new task team if one is not available.
2094 // Cannot use __kmp_thread_malloc() because threads not around for
2095 // kmp_reap_task_team( ).
2096 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2097 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2098 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2099 //task_team -> tt.tt_max_threads = 0;
2100 //task_team -> tt.tt_next = NULL;
2101 }
2102
2103 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2104 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2105
2106 task_team -> tt.tt_state = 0;
2107 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2108 TCW_4( task_team -> tt.tt_active, TRUE );
2109 TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2110
2111 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2112 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2113 return task_team;
2114}
2115
2116
2117//------------------------------------------------------------------------------
2118// __kmp_free_task_team:
2119// Frees the task team associated with a specific thread, and adds it
2120// to the global task team free list.
2121//
2122
2123static void
2124__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2125{
2126 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2127 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2128
2129 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2130
2131 // Put task team back on free list
2132 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2133
2134 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2135 task_team -> tt.tt_next = __kmp_free_task_teams;
2136 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2137 TCW_PTR(__kmp_free_task_teams, task_team);
2138
2139 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2140}
2141
2142
2143//------------------------------------------------------------------------------
2144// __kmp_reap_task_teams:
2145// Free all the task teams on the task team free list.
2146// Should only be done during library shutdown.
2147// Cannot do anything that needs a thread structure or gtid since they are already gone.
2148
2149void
2150__kmp_reap_task_teams( void )
2151{
2152 kmp_task_team_t *task_team;
2153
2154 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2155 // Free all task_teams on the free list
2156 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2157 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2158 __kmp_free_task_teams = task_team -> tt.tt_next;
2159 task_team -> tt.tt_next = NULL;
2160
2161 // Free threads_data if necessary
2162 if ( task_team -> tt.tt_threads_data != NULL ) {
2163 __kmp_free_task_threads_data( task_team );
2164 }
2165 __kmp_free( task_team );
2166 }
2167 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2168 }
2169}
2170
2171
2172//------------------------------------------------------------------------------
2173// __kmp_unref_task_teams:
2174// Remove one thread from referencing the task team structure by
2175// decreasing the reference count and deallocate task team if no more
2176// references to it.
2177//
2178void
2179__kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2180{
2181 kmp_uint ref_ct;
2182
2183 ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2184
2185 KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2186 __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2187
2188
2189 if ( ref_ct == 0 ) {
2190 __kmp_free_task_team( thread, task_team );
2191 }
2192
2193 TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2194}
2195
2196
2197//------------------------------------------------------------------------------
2198// __kmp_wait_to_unref_task_teams:
2199// Some threads could still be in the fork barrier release code, possibly
2200// trying to steal tasks. Wait for each thread to unreference its task team.
2201//
2202void
2203__kmp_wait_to_unref_task_teams(void)
2204{
2205 kmp_info_t *thread;
2206 kmp_uint32 spins;
2207 int done;
2208
2209 KMP_INIT_YIELD( spins );
2210
2211
2212 for (;;) {
2213 done = TRUE;
2214
2215 // TODO: GEH - this may be is wrong because some sync would be necessary
2216 // in case threads are added to the pool during the traversal.
2217 // Need to verify that lock for thread pool is held when calling
2218 // this routine.
2219 for (thread = (kmp_info_t *)__kmp_thread_pool;
2220 thread != NULL;
2221 thread = thread->th.th_next_pool)
2222 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002223#if KMP_OS_WINDOWS
2224 DWORD exit_val;
2225#endif
2226 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2227 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2228 __kmp_gtid_from_thread( thread ) ) );
2229 continue;
2230 }
2231#if KMP_OS_WINDOWS
2232 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2233 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2234 if (TCR_PTR(thread->th.th_task_team) != NULL) {
2235 __kmp_unref_task_team( thread->th.th_task_team, thread );
2236 }
2237 continue;
2238 }
2239#endif
2240
2241 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2242
2243 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2244 __kmp_gtid_from_thread( thread ) ) );
2245
2246 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002247 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002248 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002249 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002250 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2251 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002252 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002253 }
2254 }
2255 }
2256 if (done) {
2257 break;
2258 }
2259
2260 // If we are oversubscribed,
2261 // or have waited a bit (and library mode is throughput), yield.
2262 // Pause is in the following code.
2263 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2264 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2265 }
2266
2267
2268}
2269
2270
2271//------------------------------------------------------------------------------
2272// __kmp_task_team_setup: Create a task_team for the current team, but use
2273// an already created, unused one if it already exists.
2274// This may be called by any thread, but only for teams with # threads >1.
2275
2276void
2277__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team )
2278{
2279 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2280
2281 if ( ( team->t.t_task_team == NULL ) && ( team->t.t_nproc > 1 ) ) {
2282 // Allocate a new task team, which will be propagated to
2283 // all of the worker threads after the barrier. As they
2284 // spin in the barrier release phase, then will continue
2285 // to use the previous task team struct, until they receive
2286 // the signal to stop checking for tasks (they can't safely
2287 // reference the kmp_team_t struct, which could be reallocated
2288 // by the master thread).
2289 team->t.t_task_team = __kmp_allocate_task_team( this_thr, team );
2290 KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new "
2291 "task_team %p for team %d\n",
2292 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team,
2293 ((team != NULL) ? team->t.t_id : -1)) );
2294 }
2295 else {
2296 // All threads have reported in, and no tasks were spawned
2297 // for this release->gather region. Leave the old task
2298 // team struct in place for the upcoming region. No task
2299 // teams are formed for serialized teams.
2300 }
2301 if ( team->t.t_task_team != NULL ) {
2302 // Toggle the state flag so that we can tell which side of
2303 // the barrier we are on.
2304 team->t.t_task_team->tt.tt_state = 1 - this_thr->th.th_task_state;
2305 }
2306}
2307
2308
2309//------------------------------------------------------------------------------
2310// __kmp_task_team_sync: Propagation of task team data from team to threads
2311// which happens just after the release phase of a team barrier. This may be
2312// called by any thread, but only for teams with # threads > 1.
2313
2314void
2315__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2316{
2317 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2318
2319 // On the rare chance that this thread never saw that the task
2320 // team was no longer active, then unref/deallocate it now.
2321 if ( this_thr->th.th_task_team != NULL ) {
2322 if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2323 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2324 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
2325 } else {
2326 //
2327 // We are re-using a task team that was never enabled.
2328 //
2329 KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
2330 }
2331 }
2332
2333 //
2334 // It is now safe to propagate the task team pointer from the
2335 // team struct to the current thread.
2336 //
2337 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team);
2338 if ( this_thr->th.th_task_team != NULL ) {
2339 //
2340 // Toggle the th_task_state field, instead of reading it from
2341 // the task team. Reading the tt_state field at this point
2342 // causes a 30% regression on EPCC parallel - toggling it
2343 // is much cheaper.
2344 //
2345 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2346 KMP_DEBUG_ASSERT( this_thr->th.th_task_state == TCR_4(team->t.t_task_team->tt.tt_state) );
2347 }
2348 KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2349 __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2350 this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2351}
2352
2353
2354//------------------------------------------------------------------------------
2355// __kmp_task_team_wait: Master thread waits for outstanding tasks after
2356// the barrier gather phase. Only called by master thread if #threads
2357// in team > 1 !
2358
2359void
Jim Cownie181b4bb2013-12-23 17:28:57 +00002360__kmp_task_team_wait( kmp_info_t *this_thr,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002361 kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002362 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002363 )
2364{
2365 kmp_task_team_t *task_team = team->t.t_task_team;
2366
2367 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2368 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2369
2370 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
2371 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2372 __kmp_gtid_from_thread( this_thr ), task_team ) );
2373 //
2374 // All worker threads might have dropped through to the
2375 // release phase, but could still be executing tasks.
2376 // Wait here for all tasks to complete. To avoid memory
2377 // contention, only the master thread checks for the
2378 // termination condition.
2379 //
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002380 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2381 flag.wait(this_thr, TRUE
2382 USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002383
2384 //
2385 // Kill the old task team, so that the worker threads will
2386 // stop referencing it while spinning. They will
2387 // deallocate it when the reference count reaches zero.
2388 // The master thread is not included in the ref count.
2389 //
2390 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2391 __kmp_gtid_from_thread( this_thr ), task_team ) );
2392 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2393 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2394 KMP_MB();
2395
2396 TCW_PTR(this_thr->th.th_task_team, NULL);
2397 team->t.t_task_team = NULL;
2398 }
2399}
2400
2401
2402//------------------------------------------------------------------------------
2403// __kmp_tasking_barrier:
2404// Internal function to execute all tasks prior to a regular barrier or a
2405// join barrier. It is a full barrier itself, which unfortunately turns
2406// regular barriers into double barriers and join barriers into 1 1/2
2407// barriers.
2408// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2409
2410void
2411__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2412{
2413 volatile kmp_uint32 *spin = &team->t.t_task_team->tt.tt_unfinished_threads;
2414 int flag = FALSE;
2415 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2416
2417#if USE_ITT_BUILD
2418 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2419#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002420 kmp_flag_32 spin_flag(spin, 0U);
2421 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2422 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002423#if USE_ITT_BUILD
2424 // TODO: What about itt_sync_obj??
2425 KMP_FSYNC_SPIN_PREPARE( spin );
2426#endif /* USE_ITT_BUILD */
2427
2428 if( TCR_4(__kmp_global.g.g_done) ) {
2429 if( __kmp_global.g.g_abort )
2430 __kmp_abort_thread( );
2431 break;
2432 }
2433 KMP_YIELD( TRUE ); // GH: We always yield here
2434 }
2435#if USE_ITT_BUILD
2436 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2437#endif /* USE_ITT_BUILD */
2438}
2439