blob: 6607577e69d1f06bcea27b7c24fe91e4d3ff997d [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie181b4bb2013-12-23 17:28:57 +00003 * $Revision: 42852 $
4 * $Date: 2013-12-04 10:50:49 -0600 (Wed, 04 Dec 2013) $
Jim Cownie5e8470a2013-09-27 10:38:44 +00005 */
6
7
8//===----------------------------------------------------------------------===//
9//
10// The LLVM Compiler Infrastructure
11//
12// This file is dual licensed under the MIT and the University of Illinois Open
13// Source Licenses. See LICENSE.txt for details.
14//
15//===----------------------------------------------------------------------===//
16
17
18#include "kmp.h"
19#include "kmp_i18n.h"
20#include "kmp_itt.h"
21
22
23#if OMP_30_ENABLED
24
25/* ------------------------------------------------------------------------ */
26/* ------------------------------------------------------------------------ */
27
28
29/* forward declaration */
30static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
31static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
32static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
33
34#ifndef KMP_DEBUG
35# define __kmp_static_delay( arg ) /* nothing to do */
36#else
37
38static void
39__kmp_static_delay( int arg )
40{
41/* Work around weird code-gen bug that causes assert to trip */
42# if KMP_ARCH_X86_64 && KMP_OS_LINUX
43 KMP_ASSERT( arg != 0 );
44# else
45 KMP_ASSERT( arg >= 0 );
46# endif
47}
48#endif /* KMP_DEBUG */
49
50static void
51__kmp_static_yield( int arg )
52{
53 __kmp_yield( arg );
54}
55
56#ifdef BUILD_TIED_TASK_STACK
57
58//---------------------------------------------------------------------------
59// __kmp_trace_task_stack: print the tied tasks from the task stack in order
60// from top do bottom
61//
62// gtid: global thread identifier for thread containing stack
63// thread_data: thread data for task team thread containing stack
64// threshold: value above which the trace statement triggers
65// location: string identifying call site of this function (for trace)
66
67static void
68__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
69{
70 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
71 kmp_taskdata_t **stack_top = task_stack -> ts_top;
72 kmp_int32 entries = task_stack -> ts_entries;
73 kmp_taskdata_t *tied_task;
74
75 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
76 "first_block = %p, stack_top = %p \n",
77 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
78
79 KMP_DEBUG_ASSERT( stack_top != NULL );
80 KMP_DEBUG_ASSERT( entries > 0 );
81
82 while ( entries != 0 )
83 {
84 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
85 // fix up ts_top if we need to pop from previous block
86 if ( entries & TASK_STACK_INDEX_MASK == 0 )
87 {
88 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
89
90 stack_block = stack_block -> sb_prev;
91 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
92 }
93
94 // finish bookkeeping
95 stack_top--;
96 entries--;
97
98 tied_task = * stack_top;
99
100 KMP_DEBUG_ASSERT( tied_task != NULL );
101 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
102
103 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
104 "stack_top=%p, tied_task=%p\n",
105 location, gtid, entries, stack_top, tied_task ) );
106 }
107 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
108
109 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
110 location, gtid ) );
111}
112
113//---------------------------------------------------------------------------
114// __kmp_init_task_stack: initialize the task stack for the first time
115// after a thread_data structure is created.
116// It should not be necessary to do this again (assuming the stack works).
117//
118// gtid: global thread identifier of calling thread
119// thread_data: thread data for task team thread containing stack
120
121static void
122__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
123{
124 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
125 kmp_stack_block_t *first_block;
126
127 // set up the first block of the stack
128 first_block = & task_stack -> ts_first_block;
129 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
130 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
131
132 // initialize the stack to be empty
133 task_stack -> ts_entries = TASK_STACK_EMPTY;
134 first_block -> sb_next = NULL;
135 first_block -> sb_prev = NULL;
136}
137
138
139//---------------------------------------------------------------------------
140// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
141//
142// gtid: global thread identifier for calling thread
143// thread_data: thread info for thread containing stack
144
145static void
146__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
147{
148 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
149 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
150
151 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
152 // free from the second block of the stack
153 while ( stack_block != NULL ) {
154 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
155
156 stack_block -> sb_next = NULL;
157 stack_block -> sb_prev = NULL;
158 if (stack_block != & task_stack -> ts_first_block) {
159 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
160 }
161 stack_block = next_block;
162 }
163 // initialize the stack to be empty
164 task_stack -> ts_entries = 0;
165 task_stack -> ts_top = NULL;
166}
167
168
169//---------------------------------------------------------------------------
170// __kmp_push_task_stack: Push the tied task onto the task stack.
171// Grow the stack if necessary by allocating another block.
172//
173// gtid: global thread identifier for calling thread
174// thread: thread info for thread containing stack
175// tied_task: the task to push on the stack
176
177static void
178__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
179{
180 // GEH - need to consider what to do if tt_threads_data not allocated yet
181 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
182 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
183 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
184
185 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
186 return; // Don't push anything on stack if team or team tasks are serialized
187 }
188
189 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
190 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
191
192 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
193 gtid, thread, tied_task ) );
194 // Store entry
195 * (task_stack -> ts_top) = tied_task;
196
197 // Do bookkeeping for next push
198 task_stack -> ts_top++;
199 task_stack -> ts_entries++;
200
201 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
202 {
203 // Find beginning of this task block
204 kmp_stack_block_t *stack_block =
205 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
206
207 // Check if we already have a block
208 if ( stack_block -> sb_next != NULL )
209 { // reset ts_top to beginning of next block
210 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
211 }
212 else
213 { // Alloc new block and link it up
214 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
215 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
216
217 task_stack -> ts_top = & new_block -> sb_block[0];
218 stack_block -> sb_next = new_block;
219 new_block -> sb_prev = stack_block;
220 new_block -> sb_next = NULL;
221
222 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
223 gtid, tied_task, new_block ) );
224 }
225 }
226 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
227}
228
229//---------------------------------------------------------------------------
230// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
231// the task, just check to make sure it matches the ending task passed in.
232//
233// gtid: global thread identifier for the calling thread
234// thread: thread info structure containing stack
235// tied_task: the task popped off the stack
236// ending_task: the task that is ending (should match popped task)
237
238static void
239__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
240{
241 // GEH - need to consider what to do if tt_threads_data not allocated yet
242 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
243 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
244 kmp_taskdata_t *tied_task;
245
246 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
247 return; // Don't pop anything from stack if team or team tasks are serialized
248 }
249
250 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
251 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
252
253 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
254
255 // fix up ts_top if we need to pop from previous block
256 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
257 {
258 kmp_stack_block_t *stack_block =
259 (kmp_stack_block_t *) (task_stack -> ts_top) ;
260
261 stack_block = stack_block -> sb_prev;
262 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
263 }
264
265 // finish bookkeeping
266 task_stack -> ts_top--;
267 task_stack -> ts_entries--;
268
269 tied_task = * (task_stack -> ts_top );
270
271 KMP_DEBUG_ASSERT( tied_task != NULL );
272 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
273 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
274
275 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
276 return;
277}
278#endif /* BUILD_TIED_TASK_STACK */
279
280//---------------------------------------------------
281// __kmp_push_task: Add a task to the thread's deque
282
283static kmp_int32
284__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
285{
286 kmp_info_t * thread = __kmp_threads[ gtid ];
287 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
288 kmp_task_team_t * task_team = thread->th.th_task_team;
289 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
290 kmp_thread_data_t * thread_data;
291
292 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
293
294 // The first check avoids building task_team thread data if serialized
295 if ( taskdata->td_flags.task_serial ) {
296 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
297 gtid, taskdata ) );
298 return TASK_NOT_PUSHED;
299 }
300
301 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
302 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
303 if ( ! KMP_TASKING_ENABLED( task_team, thread->th.th_task_state ) ) {
304 __kmp_enable_tasking( task_team, thread );
305 }
306 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
307 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
308
309 // Find tasking deque specific to encountering thread
310 thread_data = & task_team -> tt.tt_threads_data[ tid ];
311
312 // No lock needed since only owner can allocate
313 if (thread_data -> td.td_deque == NULL ) {
314 __kmp_alloc_task_deque( thread, thread_data );
315 }
316
317 // Check if deque is full
318 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
319 {
320 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
321 gtid, taskdata ) );
322 return TASK_NOT_PUSHED;
323 }
324
325 // Lock the deque for the task push operation
326 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
327
328 // Must have room since no thread can add tasks but calling thread
329 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
330
331 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
332 // Wrap index.
333 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
334 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
335
336 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
337
338 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
339 "task=%p ntasks=%d head=%u tail=%u\n",
340 gtid, taskdata, thread_data->td.td_deque_ntasks,
341 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
342
343 return TASK_SUCCESSFULLY_PUSHED;
344}
345
346
347//-----------------------------------------------------------------------------------------
348// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
349// this_thr: thread structure to set current_task in.
350
351void
352__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
353{
354 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
355 "curtask_parent=%p\n",
356 0, this_thr, this_thr -> th.th_current_task,
357 this_thr -> th.th_current_task -> td_parent ) );
358
359 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
360
361 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
362 "curtask_parent=%p\n",
363 0, this_thr, this_thr -> th.th_current_task,
364 this_thr -> th.th_current_task -> td_parent ) );
365}
366
367
368//---------------------------------------------------------------------------------------
369// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
370// this_thr: thread structure to set up
371// team: team for implicit task data
372// tid: thread within team to set up
373
374void
375__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
376{
377 // current task of the thread is a parent of the new just created implicit tasks of new team
378 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
379 "parent_task=%p\n",
380 tid, this_thr, this_thr->th.th_current_task,
381 team->t.t_implicit_task_taskdata[tid].td_parent ) );
382
383 KMP_DEBUG_ASSERT (this_thr != NULL);
384
385 if( tid == 0 ) {
386 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
387 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
388 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
389 }
390 } else {
391 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
392 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
393 }
394
395 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
396 "parent_task=%p\n",
397 tid, this_thr, this_thr->th.th_current_task,
398 team->t.t_implicit_task_taskdata[tid].td_parent ) );
399}
400
401
402//----------------------------------------------------------------------
403// __kmp_task_start: bookkeeping for a task starting execution
404// GTID: global thread id of calling thread
405// task: task starting execution
406// current_task: task suspending
407
408static void
409__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
410{
411 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
412 kmp_info_t * thread = __kmp_threads[ gtid ];
413
414 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
415 gtid, taskdata, current_task) );
416
417 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
418
419 // mark currently executing task as suspended
420 // TODO: GEH - make sure root team implicit task is initialized properly.
421 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
422 current_task -> td_flags.executing = 0;
423
424 // Add task to stack if tied
425#ifdef BUILD_TIED_TASK_STACK
426 if ( taskdata -> td_flags.tiedness == TASK_TIED )
427 {
428 __kmp_push_task_stack( gtid, thread, taskdata );
429 }
430#endif /* BUILD_TIED_TASK_STACK */
431
432 // mark starting task as executing and as current task
433 thread -> th.th_current_task = taskdata;
434
435 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
436 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
437 taskdata -> td_flags.started = 1;
438 taskdata -> td_flags.executing = 1;
439 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
440 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
441
442 // GEH TODO: shouldn't we pass some sort of location identifier here?
443 // APT: yes, we will pass location here.
444 // need to store current thread state (in a thread or taskdata structure)
445 // before setting work_state, otherwise wrong state is set after end of task
446
447 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
448 gtid, taskdata ) );
449
450 return;
451}
452
453
454//----------------------------------------------------------------------
455// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
456// loc_ref: source location information; points to beginning of task block.
457// gtid: global thread number.
458// task: task thunk for the started task.
459
460void
461__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
462{
463 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
464 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
465
466 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
467 gtid, loc_ref, taskdata, current_task ) );
468
469 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
470 __kmp_task_start( gtid, task, current_task );
471
472 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
473 gtid, loc_ref, taskdata ) );
474
475 return;
476}
477
478#ifdef TASK_UNUSED
479//----------------------------------------------------------------------
480// __kmpc_omp_task_begin: report that a given task has started execution
481// NEVER GENERATED BY COMPILER, DEPRECATED!!!
482
483void
484__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
485{
486 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
487
488 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
489 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
490
491 __kmp_task_start( gtid, task, current_task );
492
493 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
494 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
495
496 return;
497}
498#endif // TASK_UNUSED
499
500
501//-------------------------------------------------------------------------------------
502// __kmp_free_task: free the current task space and the space for shareds
503// gtid: Global thread ID of calling thread
504// taskdata: task to free
505// thread: thread data structure of caller
506
507static void
508__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
509{
510 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
511 gtid, taskdata) );
512
513 // Check to make sure all flags and counters have the correct values
514 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
515 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
516 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
517 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
518 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
519 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
520
521 taskdata->td_flags.freed = 1;
522 // deallocate the taskdata and shared variable blocks associated with this task
523 #if USE_FAST_MEMORY
524 __kmp_fast_free( thread, taskdata );
525 #else /* ! USE_FAST_MEMORY */
526 __kmp_thread_free( thread, taskdata );
527 #endif
528
529 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
530 gtid, taskdata) );
531}
532
533//-------------------------------------------------------------------------------------
534// __kmp_free_task_and_ancestors: free the current task and ancestors without children
535//
536// gtid: Global thread ID of calling thread
537// taskdata: task to free
538// thread: thread data structure of caller
539
540static void
541__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
542{
543 kmp_int32 children = 0;
544 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
545
546 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
547
548 if ( !team_or_tasking_serialized ) {
549 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
550 KMP_DEBUG_ASSERT( children >= 0 );
551 }
552
553 // Now, go up the ancestor tree to see if any ancestors can now be freed.
554 while ( children == 0 )
555 {
556 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
557
558 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
559 "and freeing itself\n", gtid, taskdata) );
560
561 // --- Deallocate my ancestor task ---
562 __kmp_free_task( gtid, taskdata, thread );
563
564 taskdata = parent_taskdata;
565
566 // Stop checking ancestors at implicit task or if tasking serialized
567 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
568 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
569 return;
570
571 if ( !team_or_tasking_serialized ) {
572 // Predecrement simulated by "- 1" calculation
573 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
574 KMP_DEBUG_ASSERT( children >= 0 );
575 }
576 }
577
578 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
579 "not freeing it yet\n", gtid, taskdata, children) );
580}
581
582//---------------------------------------------------------------------
583// __kmp_task_finish: bookkeeping to do when a task finishes execution
584// gtid: global thread ID for calling thread
585// task: task to be finished
586// resumed_task: task to be resumed. (may be NULL if task is serialized)
587
588static void
589__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
590{
591 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
592 kmp_info_t * thread = __kmp_threads[ gtid ];
593 kmp_int32 children = 0;
594
595 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
596 gtid, taskdata, resumed_task) );
597
598 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
599
600 // Pop task from stack if tied
601#ifdef BUILD_TIED_TASK_STACK
602 if ( taskdata -> td_flags.tiedness == TASK_TIED )
603 {
604 __kmp_pop_task_stack( gtid, thread, taskdata );
605 }
606#endif /* BUILD_TIED_TASK_STACK */
607
608 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
609 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
610 taskdata -> td_flags.executing = 0; // suspend the finishing task
611 taskdata -> td_flags.complete = 1; // mark the task as completed
612 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
613 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
614
615 // Only need to keep track of count if team parallel and tasking not serialized
616 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
617 // Predecrement simulated by "- 1" calculation
618 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
619 KMP_DEBUG_ASSERT( children >= 0 );
620#if OMP_40_ENABLED
621 if ( taskdata->td_taskgroup )
622 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000623 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000624#endif
625 }
626
627 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
628 gtid, taskdata, children) );
629
Jim Cownie181b4bb2013-12-23 17:28:57 +0000630#if OMP_40_ENABLED
631 /* If the tasks' destructor thunk flag has been set, we need to invoke the
632 destructor thunk that has been generated by the compiler.
633 The code is placed here, since at this point other tasks might have been released
634 hence overlapping the destructor invokations with some other work in the
635 released tasks. The OpenMP spec is not specific on when the destructors are
636 invoked, so we should be free to choose.
637 */
638 if (taskdata->td_flags.destructors_thunk) {
639 kmp_routine_entry_t destr_thunk = task->destructors;
640 KMP_ASSERT(destr_thunk);
641 destr_thunk(gtid, task);
642 }
643#endif // OMP_40_ENABLED
644
Jim Cownie5e8470a2013-09-27 10:38:44 +0000645 // bookkeeping for resuming task:
646 // GEH - note tasking_ser => task_serial
647 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
648 taskdata->td_flags.task_serial);
649 if ( taskdata->td_flags.task_serial )
650 {
651 if (resumed_task == NULL) {
652 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
653 }
654 else {
655 // verify resumed task passed in points to parent
656 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
657 }
658 }
659 else {
660 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
661 }
662
663 // Free this task and then ancestor tasks if they have no children.
664 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
665
666 __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
667
668 // TODO: GEH - make sure root team implicit task is initialized properly.
669 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
670 resumed_task->td_flags.executing = 1; // resume previous task
671
672 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
673 gtid, taskdata, resumed_task) );
674
675 return;
676}
677
678//---------------------------------------------------------------------
679// __kmpc_omp_task_complete_if0: report that a task has completed execution
680// loc_ref: source location information; points to end of task block.
681// gtid: global thread number.
682// task: task thunk for the completed task.
683
684void
685__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
686{
687 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
688 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
689
690 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
691
692 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
693 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
694
695 return;
696}
697
698#ifdef TASK_UNUSED
699//---------------------------------------------------------------------
700// __kmpc_omp_task_complete: report that a task has completed execution
701// NEVER GENERATED BY COMPILER, DEPRECATED!!!
702
703void
704__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
705{
706 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
707 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
708
709 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
710
711 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
712 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
713 return;
714}
715#endif // TASK_UNUSED
716
717
718//----------------------------------------------------------------------------------------------------
719// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
720//
721// loc_ref: reference to source location of parallel region
722// this_thr: thread data structure corresponding to implicit task
723// team: team for this_thr
724// tid: thread id of given thread within team
725// set_curr_task: TRUE if need to push current task to thread
726// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
727// TODO: Get better loc_ref. Value passed in may be NULL
728
729void
730__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
731{
732 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
733
734 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
735 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
736
737 task->td_task_id = KMP_GEN_TASK_ID();
738 task->td_team = team;
739// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
740 task->td_ident = loc_ref;
741 task->td_taskwait_ident = NULL;
742 task->td_taskwait_counter = 0;
743 task->td_taskwait_thread = 0;
744
745 task->td_flags.tiedness = TASK_TIED;
746 task->td_flags.tasktype = TASK_IMPLICIT;
747 // All implicit tasks are executed immediately, not deferred
748 task->td_flags.task_serial = 1;
749 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
750 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
751
752 task->td_flags.started = 1;
753 task->td_flags.executing = 1;
754 task->td_flags.complete = 0;
755 task->td_flags.freed = 0;
756
Jim Cownie181b4bb2013-12-23 17:28:57 +0000757#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000758 task->td_dephash = NULL;
759 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000760#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000761
762 if (set_curr_task) { // only do this initialization the first time a thread is created
763 task->td_incomplete_child_tasks = 0;
764 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
765#if OMP_40_ENABLED
766 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
767#endif
768 __kmp_push_current_task_to_thread( this_thr, team, tid );
769 } else {
770 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
771 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
772 }
773
774 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
775 tid, team, task ) );
776}
777
778// Round up a size to a power of two specified by val
779// Used to insert padding between structures co-allocated using a single malloc() call
780static size_t
781__kmp_round_up_to_val( size_t size, size_t val ) {
782 if ( size & ( val - 1 ) ) {
783 size &= ~ ( val - 1 );
784 if ( size <= KMP_SIZE_T_MAX - val ) {
785 size += val; // Round up if there is no overflow.
786 }; // if
787 }; // if
788 return size;
789} // __kmp_round_up_to_va
790
791
792//---------------------------------------------------------------------------------
793// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
794//
795// loc_ref: source location information
796// gtid: global thread number.
797// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
798// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
799// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
800// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
801// task_entry: Pointer to task code entry point generated by compiler.
802// returns: a pointer to the allocated kmp_task_t structure (task).
803
804kmp_task_t *
805__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
806 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
807 kmp_routine_entry_t task_entry )
808{
809 kmp_task_t *task;
810 kmp_taskdata_t *taskdata;
811 kmp_info_t *thread = __kmp_threads[ gtid ];
812 kmp_team_t *team = thread->th.th_team;
813 kmp_taskdata_t *parent_task = thread->th.th_current_task;
814 size_t shareds_offset;
815
816 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
817 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
818 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
819 sizeof_shareds, task_entry) );
820
821 if ( parent_task->td_flags.final ) {
822 if (flags->merged_if0) {
823 }
824 flags->final = 1;
825 }
826
827 // Calculate shared structure offset including padding after kmp_task_t struct
828 // to align pointers in shared struct
829 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
830 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
831
832 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
833 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
834 gtid, shareds_offset) );
835 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
836 gtid, sizeof_shareds) );
837
838 // Avoid double allocation here by combining shareds with taskdata
839 #if USE_FAST_MEMORY
840 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
841 #else /* ! USE_FAST_MEMORY */
842 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
843 #endif /* USE_FAST_MEMORY */
844
845 task = KMP_TASKDATA_TO_TASK(taskdata);
846
847 // Make sure task & taskdata are aligned appropriately
848#if KMP_ARCH_X86
849 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
850 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
851#else
852 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
853 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
854#endif
855 if (sizeof_shareds > 0) {
856 // Avoid double allocation here by combining shareds with taskdata
857 task->shareds = & ((char *) taskdata)[ shareds_offset ];
858 // Make sure shareds struct is aligned to pointer size
859 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
860 } else {
861 task->shareds = NULL;
862 }
863 task->routine = task_entry;
864 task->part_id = 0; // AC: Always start with 0 part id
865
866 taskdata->td_task_id = KMP_GEN_TASK_ID();
867 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000868 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000869 taskdata->td_parent = parent_task;
870 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
871 taskdata->td_ident = loc_ref;
872 taskdata->td_taskwait_ident = NULL;
873 taskdata->td_taskwait_counter = 0;
874 taskdata->td_taskwait_thread = 0;
875 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
876 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
877
878 taskdata->td_flags.tiedness = flags->tiedness;
879 taskdata->td_flags.final = flags->final;
880 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000881#if OMP_40_ENABLED
882 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
883#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000884 taskdata->td_flags.tasktype = TASK_EXPLICIT;
885
886 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
887 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
888
889 // GEH - TODO: fix this to copy parent task's value of team_serial flag
890 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
891
892 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
893 // tasks are not left until program termination to execute. Also, it helps locality to execute
894 // immediately.
895 taskdata->td_flags.task_serial = ( taskdata->td_flags.final
896 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
897
898 taskdata->td_flags.started = 0;
899 taskdata->td_flags.executing = 0;
900 taskdata->td_flags.complete = 0;
901 taskdata->td_flags.freed = 0;
902
903 taskdata->td_flags.native = flags->native;
904
905 taskdata->td_incomplete_child_tasks = 0;
906 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
907#if OMP_40_ENABLED
908 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
909 taskdata->td_dephash = NULL;
910 taskdata->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000911#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000912 // Only need to keep track of child task counts if team parallel and tasking not serialized
913 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
914 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
915#if OMP_40_ENABLED
916 if ( parent_task->td_taskgroup )
917 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
918#endif
919 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
920 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
921 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
922 }
923 }
924
925 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
926 gtid, taskdata, taskdata->td_parent) );
927
928 return task;
929}
930
931
932kmp_task_t *
933__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
934 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
935 kmp_routine_entry_t task_entry )
936{
937 kmp_task_t *retval;
938 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
939
940 input_flags->native = FALSE;
941 // __kmp_task_alloc() sets up all other runtime flags
942
943 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
944 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
945 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
946 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
947
948 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
949 sizeof_shareds, task_entry );
950
951 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
952
953 return retval;
954}
955
956//-----------------------------------------------------------
957// __kmp_invoke_task: invoke the specified task
958//
959// gtid: global thread ID of caller
960// task: the task to invoke
961// current_task: the task to resume after task invokation
962
963static void
964__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
965{
966 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jim Cownie181b4bb2013-12-23 17:28:57 +0000967#if OMP_40_ENABLED
968 int discard = 0 /* false */;
969#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000970 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
971 gtid, taskdata, current_task) );
972
973 __kmp_task_start( gtid, task, current_task );
974
Jim Cownie181b4bb2013-12-23 17:28:57 +0000975#if OMP_40_ENABLED
976 // TODO: cancel tasks if the parallel region has also been cancelled
977 // TODO: check if this sequence can be hoisted above __kmp_task_start
978 // if cancellation has been enabled for this run ...
979 if (__kmp_omp_cancellation) {
980 kmp_info_t *this_thr = __kmp_threads [ gtid ];
981 kmp_team_t * this_team = this_thr->th.th_team;
982 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
983 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
984 // this task belongs to a task group and we need to cancel it
985 discard = 1 /* true */;
986 }
987 }
988
Jim Cownie5e8470a2013-09-27 10:38:44 +0000989 //
990 // Invoke the task routine and pass in relevant data.
991 // Thunks generated by gcc take a different argument list.
992 //
Jim Cownie181b4bb2013-12-23 17:28:57 +0000993 if (!discard) {
994#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000995#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +0000996 if (taskdata->td_flags.native) {
997 ((void (*)(void *))(*(task->routine)))(task->shareds);
998 }
999 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001000#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001001 {
1002 (*(task->routine))(gtid, task);
1003 }
1004#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001005 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001006#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001007
1008 __kmp_task_finish( gtid, task, current_task );
1009
1010 KA_TRACE(30, ("__kmp_inovke_task(exit): T#%d completed task %p, resuming task %p\n",
1011 gtid, taskdata, current_task) );
1012 return;
1013}
1014
1015//-----------------------------------------------------------------------
1016// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1017//
1018// loc_ref: location of original task pragma (ignored)
1019// gtid: Global Thread ID of encountering thread
1020// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1021// Returns:
1022// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1023// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1024
1025kmp_int32
1026__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1027{
1028 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1029
1030 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1031 gtid, loc_ref, new_taskdata ) );
1032
1033 /* Should we execute the new task or queue it? For now, let's just always try to
1034 queue it. If the queue fills up, then we'll execute it. */
1035
1036 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1037 { // Execute this task immediately
1038 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1039 new_taskdata->td_flags.task_serial = 1;
1040 __kmp_invoke_task( gtid, new_task, current_task );
1041 }
1042
1043 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1044 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1045 new_taskdata ) );
1046
1047 return TASK_CURRENT_NOT_QUEUED;
1048}
1049
1050
1051//---------------------------------------------------------------------
1052// __kmpc_omp_task: Schedule a non-thread-switchable task for execution
1053// loc_ref: location of original task pragma (ignored)
1054// gtid: Global Thread ID of encountering thread
1055// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1056// returns:
1057//
1058// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1059// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1060
1061kmp_int32
1062__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1063{
1064 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1065 kmp_int32 rc;
1066
1067 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1068 gtid, loc_ref, new_taskdata ) );
1069
1070 /* Should we execute the new task or queue it? For now, let's just always try to
1071 queue it. If the queue fills up, then we'll execute it. */
1072
1073 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1074 { // Execute this task immediately
1075 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1076 new_taskdata -> td_flags.task_serial = 1;
1077 __kmp_invoke_task( gtid, new_task, current_task );
1078 }
1079
1080 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1081 gtid, loc_ref, new_taskdata ) );
1082
1083 return TASK_CURRENT_NOT_QUEUED;
1084}
1085
1086
1087//-------------------------------------------------------------------------------------
1088// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1089
1090kmp_int32
1091__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1092{
1093 kmp_taskdata_t * taskdata;
1094 kmp_info_t * thread;
1095 int thread_finished = FALSE;
1096
1097 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1098 gtid, loc_ref) );
1099
1100 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1101 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1102
1103 thread = __kmp_threads[ gtid ];
1104 taskdata = thread -> th.th_current_task;
1105#if USE_ITT_BUILD
1106 // Note: These values are used by ITT events as well.
1107#endif /* USE_ITT_BUILD */
1108 taskdata->td_taskwait_counter += 1;
1109 taskdata->td_taskwait_ident = loc_ref;
1110 taskdata->td_taskwait_thread = gtid + 1;
1111
1112#if USE_ITT_BUILD
1113 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1114 if ( itt_sync_obj != NULL )
1115 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1116#endif /* USE_ITT_BUILD */
1117
1118 if ( ! taskdata->td_flags.team_serial ) {
1119 // GEH: if team serialized, avoid reading the volatile variable below.
1120 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1121 __kmp_execute_tasks( thread, gtid, &(taskdata->td_incomplete_child_tasks),
Jim Cownie181b4bb2013-12-23 17:28:57 +00001122 0, FALSE, &thread_finished
1123 USE_ITT_BUILD_ARG(itt_sync_obj),
Jim Cownie5e8470a2013-09-27 10:38:44 +00001124 __kmp_task_stealing_constraint );
1125 }
1126 }
1127#if USE_ITT_BUILD
1128 if ( itt_sync_obj != NULL )
1129 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1130#endif /* USE_ITT_BUILD */
1131
1132 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1133 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1134 }
1135
1136 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1137 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1138
1139 return TASK_CURRENT_NOT_QUEUED;
1140}
1141
1142
1143//-------------------------------------------------
1144// __kmpc_omp_taskyield: switch to a different task
1145
1146kmp_int32
1147__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1148{
1149 kmp_taskdata_t * taskdata;
1150 kmp_info_t * thread;
1151 int thread_finished = FALSE;
1152
1153 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1154 gtid, loc_ref, end_part) );
1155
1156 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1157 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1158
1159 thread = __kmp_threads[ gtid ];
1160 taskdata = thread -> th.th_current_task;
1161 // Should we model this as a task wait or not?
1162#if USE_ITT_BUILD
1163 // Note: These values are used by ITT events as well.
1164#endif /* USE_ITT_BUILD */
1165 taskdata->td_taskwait_counter += 1;
1166 taskdata->td_taskwait_ident = loc_ref;
1167 taskdata->td_taskwait_thread = gtid + 1;
1168
1169#if USE_ITT_BUILD
1170 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1171 if ( itt_sync_obj != NULL )
1172 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1173#endif /* USE_ITT_BUILD */
1174 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie181b4bb2013-12-23 17:28:57 +00001175 __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished
1176 USE_ITT_BUILD_ARG(itt_sync_obj),
Jim Cownie5e8470a2013-09-27 10:38:44 +00001177 __kmp_task_stealing_constraint );
1178 }
1179
1180#if USE_ITT_BUILD
1181 if ( itt_sync_obj != NULL )
1182 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1183#endif /* USE_ITT_BUILD */
1184
1185 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1186 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1187 }
1188
1189 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1190 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1191
1192 return TASK_CURRENT_NOT_QUEUED;
1193}
1194
1195
1196#if OMP_40_ENABLED
1197//-------------------------------------------------------------------------------------
1198// __kmpc_taskgroup: Start a new taskgroup
1199
1200void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001201__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001202{
1203 kmp_info_t * thread = __kmp_threads[ gtid ];
1204 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1205 kmp_taskgroup_t * tg_new =
1206 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1207 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1208 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001209 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001210 tg_new->parent = taskdata->td_taskgroup;
1211 taskdata->td_taskgroup = tg_new;
1212}
1213
1214
1215//-------------------------------------------------------------------------------------
1216// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1217// and its descendants are complete
1218
1219void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001220__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001221{
1222 kmp_info_t * thread = __kmp_threads[ gtid ];
1223 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1224 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1225 int thread_finished = FALSE;
1226
1227 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1228 KMP_DEBUG_ASSERT( taskgroup != NULL );
1229
1230 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1231#if USE_ITT_BUILD
1232 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1233 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1234 if ( itt_sync_obj != NULL )
1235 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1236#endif /* USE_ITT_BUILD */
1237
1238 if ( ! taskdata->td_flags.team_serial ) {
1239 while ( TCR_4(taskgroup->count) != 0 ) {
1240 __kmp_execute_tasks( thread, gtid, &(taskgroup->count),
Jim Cownie181b4bb2013-12-23 17:28:57 +00001241 0, FALSE, &thread_finished
1242 USE_ITT_BUILD_ARG(itt_sync_obj),
Jim Cownie5e8470a2013-09-27 10:38:44 +00001243 __kmp_task_stealing_constraint );
1244 }
1245 }
1246
1247#if USE_ITT_BUILD
1248 if ( itt_sync_obj != NULL )
1249 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1250#endif /* USE_ITT_BUILD */
1251 }
1252 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1253
1254 // Restore parent taskgroup for the current task
1255 taskdata->td_taskgroup = taskgroup->parent;
1256 __kmp_thread_free( thread, taskgroup );
1257
1258 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1259}
1260#endif
1261
1262
1263//------------------------------------------------------
1264// __kmp_remove_my_task: remove a task from my own deque
1265
1266static kmp_task_t *
1267__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1268 kmp_int32 is_constrained )
1269{
1270 kmp_task_t * task;
1271 kmp_taskdata_t * taskdata;
1272 kmp_thread_data_t *thread_data;
1273 kmp_uint32 tail;
1274
1275 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1276 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1277
1278 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1279
1280 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1281 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1282 thread_data->td.td_deque_tail) );
1283
1284 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1285 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1286 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1287 thread_data->td.td_deque_tail) );
1288 return NULL;
1289 }
1290
1291 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1292
1293 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1294 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1295 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1296 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1297 thread_data->td.td_deque_tail) );
1298 return NULL;
1299 }
1300
1301 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1302 taskdata = thread_data -> td.td_deque[ tail ];
1303
1304 if (is_constrained) {
1305 // we need to check if the candidate obeys task scheduling constraint:
1306 // only child of current task can be scheduled
1307 kmp_taskdata_t * current = thread->th.th_current_task;
1308 kmp_int32 level = current->td_level;
1309 kmp_taskdata_t * parent = taskdata->td_parent;
1310 while ( parent != current && parent->td_level > level ) {
1311 parent = parent->td_parent; // check generation up to the level of the current task
1312 KMP_DEBUG_ASSERT(parent != NULL);
1313 }
1314 if ( parent != current ) {
1315 // If the tail task is not a child, then no other childs can appear in the deque.
1316 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1317 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1318 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1319 thread_data->td.td_deque_tail) );
1320 return NULL;
1321 }
1322 }
1323
1324 thread_data -> td.td_deque_tail = tail;
1325 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1326
1327 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1328
1329 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1330 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1331 thread_data->td.td_deque_tail) );
1332
1333 task = KMP_TASKDATA_TO_TASK( taskdata );
1334 return task;
1335}
1336
1337
1338//-----------------------------------------------------------
1339// __kmp_steal_task: remove a task from another thread's deque
1340// Assume that calling thread has already checked existence of
1341// task_team thread_data before calling this routine.
1342
1343static kmp_task_t *
1344__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1345 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1346 kmp_int32 is_constrained )
1347{
1348 kmp_task_t * task;
1349 kmp_taskdata_t * taskdata;
1350 kmp_thread_data_t *victim_td, *threads_data;
1351 kmp_int32 victim_tid, thread_tid;
1352
1353 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1354
1355 threads_data = task_team -> tt.tt_threads_data;
1356 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1357
1358 victim_tid = victim->th.th_info.ds.ds_tid;
1359 victim_td = & threads_data[ victim_tid ];
1360
1361 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1362 "head=%u tail=%u\n",
1363 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1364 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1365
1366 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1367 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1368 {
1369 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1370 "ntasks=%d head=%u tail=%u\n",
1371 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1372 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1373 return NULL;
1374 }
1375
1376 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1377
1378 // Check again after we acquire the lock
1379 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1380 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1381 {
1382 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1383 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1384 "ntasks=%d head=%u tail=%u\n",
1385 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1386 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1387 return NULL;
1388 }
1389
1390 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1391
1392 if ( !is_constrained ) {
1393 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1394 // Bump head pointer and Wrap.
1395 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1396 } else {
1397 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1398 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1399 taskdata = victim_td -> td.td_deque[ tail ];
1400 // we need to check if the candidate obeys task scheduling constraint:
1401 // only child of current task can be scheduled
1402 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1403 kmp_int32 level = current->td_level;
1404 kmp_taskdata_t * parent = taskdata->td_parent;
1405 while ( parent != current && parent->td_level > level ) {
1406 parent = parent->td_parent; // check generation up to the level of the current task
1407 KMP_DEBUG_ASSERT(parent != NULL);
1408 }
1409 if ( parent != current ) {
1410 // If the tail task is not a child, then no other childs can appear in the deque (?).
1411 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1412 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1413 "ntasks=%d head=%u tail=%u\n",
1414 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1415 task_team, victim_td->td.td_deque_ntasks,
1416 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1417 return NULL;
1418 }
1419 victim_td -> td.td_deque_tail = tail;
1420 }
1421 if (*thread_finished) {
1422 // We need to un-mark this victim as a finished victim. This must be done before
1423 // releasing the lock, or else other threads (starting with the master victim)
1424 // might be prematurely released from the barrier!!!
1425 kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1426
1427 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1428 gtid, count + 1, task_team) );
1429
1430 *thread_finished = FALSE;
1431 }
1432 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1433
1434 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1435
1436 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#d: task_team=%p "
1437 "ntasks=%d head=%u tail=%u\n",
1438 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1439 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1440 victim_td->td.td_deque_tail) );
1441
1442 task = KMP_TASKDATA_TO_TASK( taskdata );
1443 return task;
1444}
1445
1446
1447//-----------------------------------------------------------------------------
1448// __kmp_execute_tasks: Choose and execute tasks until either the condition
1449// is statisfied (return true) or there are none left (return false).
1450// final_spin is TRUE if this is the spin at the release barrier.
1451// thread_finished indicates whether the thread is finished executing all
1452// the tasks it has on its deque, and is at the release barrier.
1453// spinner is the location on which to spin.
1454// spinner == NULL means only execute a single task and return.
1455// checker is the value to check to terminate the spin.
1456
1457int
Jim Cownie181b4bb2013-12-23 17:28:57 +00001458__kmp_execute_tasks( kmp_info_t *thread,
1459 kmp_int32 gtid,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001460 volatile kmp_uint *spinner,
1461 kmp_uint checker,
Jim Cownie181b4bb2013-12-23 17:28:57 +00001462 int final_spin,
1463 int *thread_finished
1464 USE_ITT_BUILD_ARG(void * itt_sync_obj),
Jim Cownie5e8470a2013-09-27 10:38:44 +00001465 kmp_int32 is_constrained )
1466{
1467 kmp_task_team_t * task_team;
1468 kmp_team_t * team;
1469 kmp_thread_data_t * threads_data;
1470 kmp_task_t * task;
1471 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1472 volatile kmp_uint32 * unfinished_threads;
1473 kmp_int32 nthreads, last_stolen, k, tid;
1474
1475 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1476 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1477
1478 task_team = thread -> th.th_task_team;
1479 KMP_DEBUG_ASSERT( task_team != NULL );
1480
1481 KA_TRACE(15, ("__kmp_execute_tasks(enter): T#%d final_spin=%d *thread_finished=%d\n",
1482 gtid, final_spin, *thread_finished) );
1483
1484 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1485 KMP_DEBUG_ASSERT( threads_data != NULL );
1486
1487 nthreads = task_team -> tt.tt_nproc;
1488 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1489 KMP_DEBUG_ASSERT( nthreads > 1 );
1490 KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1491
1492 // Choose tasks from our own work queue.
1493 start:
1494 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1495#if USE_ITT_BUILD && USE_ITT_NOTIFY
1496 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1497 if ( itt_sync_obj == NULL ) {
1498 // we are at fork barrier where we could not get the object reliably
1499 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1500 }
1501 __kmp_itt_task_starting( itt_sync_obj );
1502 }
1503#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1504 __kmp_invoke_task( gtid, task, current_task );
1505#if USE_ITT_BUILD
1506 if ( itt_sync_obj != NULL )
1507 __kmp_itt_task_finished( itt_sync_obj );
1508#endif /* USE_ITT_BUILD */
1509
1510 // If this thread is only partway through the barrier and the condition
1511 // is met, then return now, so that the barrier gather/release pattern can proceed.
1512 // If this thread is in the last spin loop in the barrier, waiting to be
1513 // released, we know that the termination condition will not be satisified,
1514 // so don't waste any cycles checking it.
1515 if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1516 KA_TRACE(15, ("__kmp_execute_tasks(exit #1): T#%d spin condition satisfied\n", gtid) );
1517 return TRUE;
1518 }
1519 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1520 }
1521
1522 // This thread's work queue is empty. If we are in the final spin loop
1523 // of the barrier, check and see if the termination condition is satisfied.
1524 if (final_spin) {
1525 // First, decrement the #unfinished threads, if that has not already
1526 // been done. This decrement might be to the spin location, and
1527 // result in the termination condition being satisfied.
1528 if (! *thread_finished) {
1529 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1530 KA_TRACE(20, ("__kmp_execute_tasks(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
1531 gtid, count, task_team) );
1532 *thread_finished = TRUE;
1533 }
1534
1535 // It is now unsafe to reference thread->th.th_team !!!
1536 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1537 // thread to pass through the barrier, where it might reset each thread's
1538 // th.th_team field for the next parallel region.
1539 // If we can steal more work, we know that this has not happened yet.
1540 if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1541 KA_TRACE(15, ("__kmp_execute_tasks(exit #2): T#%d spin condition satisfied\n", gtid) );
1542 return TRUE;
1543 }
1544 }
1545
1546 // Try to steal from the last place I stole from successfully.
1547 tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1548 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1549
1550 if (last_stolen != -1) {
1551 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1552
1553 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1554 thread_finished, is_constrained )) != NULL)
1555 {
1556#if USE_ITT_BUILD && USE_ITT_NOTIFY
1557 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1558 if ( itt_sync_obj == NULL ) {
1559 // we are at fork barrier where we could not get the object reliably
1560 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1561 }
1562 __kmp_itt_task_starting( itt_sync_obj );
1563 }
1564#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1565 __kmp_invoke_task( gtid, task, current_task );
1566#if USE_ITT_BUILD
1567 if ( itt_sync_obj != NULL )
1568 __kmp_itt_task_finished( itt_sync_obj );
1569#endif /* USE_ITT_BUILD */
1570
1571 // Check to see if this thread can proceed.
1572 if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1573 KA_TRACE(15, ("__kmp_execute_tasks(exit #3): T#%d spin condition satisfied\n",
1574 gtid) );
1575 return TRUE;
1576 }
1577
1578 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1579 // If the execution of the stolen task resulted in more tasks being
1580 // placed on our run queue, then restart the whole process.
1581 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1582 KA_TRACE(20, ("__kmp_execute_tasks: T#%d stolen task spawned other tasks, restart\n",
1583 gtid) );
1584 goto start;
1585 }
1586 }
1587
1588 // Don't give priority to stealing from this thread anymore.
1589 threads_data[ tid ].td.td_deque_last_stolen = -1;
1590
1591 // The victims's work queue is empty. If we are in the final spin loop
1592 // of the barrier, check and see if the termination condition is satisfied.
1593 if (final_spin) {
1594 // First, decrement the #unfinished threads, if that has not already
1595 // been done. This decrement might be to the spin location, and
1596 // result in the termination condition being satisfied.
1597 if (! *thread_finished) {
1598 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1599 KA_TRACE(20, ("__kmp_execute_tasks(dec #2): T#%d dec unfinished_threads to %d "
1600 "task_team=%p\n", gtid, count, task_team) );
1601 *thread_finished = TRUE;
1602 }
1603
1604 // If __kmp_tasking_mode != tskm_immediate_exec
1605 // then it is now unsafe to reference thread->th.th_team !!!
1606 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1607 // thread to pass through the barrier, where it might reset each thread's
1608 // th.th_team field for the next parallel region.
1609 // If we can steal more work, we know that this has not happened yet.
1610 if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1611 KA_TRACE(15, ("__kmp_execute_tasks(exit #4): T#%d spin condition satisfied\n",
1612 gtid) );
1613 return TRUE;
1614 }
1615 }
1616 }
1617
1618 // Find a different thread to steal work from. Pick a random thread.
1619 // My initial plan was to cycle through all the threads, and only return
1620 // if we tried to steal from every thread, and failed. Arch says that's
1621 // not such a great idea.
1622 // GEH - need yield code in this loop for throughput library mode?
1623 new_victim:
1624 k = __kmp_get_random( thread ) % (nthreads - 1);
1625 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1626 ++k; // Adjusts random distribution to exclude self
1627 }
1628 {
1629 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1630 int first;
1631
1632 // There is a slight chance that __kmp_enable_tasking() did not wake up
1633 // all threads waiting at the barrier. If this thread is sleeping, then
1634 // then wake it up. Since we weree going to pay the cache miss penalty
1635 // for referenceing another thread's kmp_info_t struct anyway, the check
1636 // shouldn't cost too much performance at this point.
1637 // In extra barrier mode, tasks do not sleep at the separate tasking
1638 // barrier, so this isn't a problem.
1639 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1640 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1641 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1642 {
1643 __kmp_resume( __kmp_gtid_from_thread( other_thread ), NULL );
1644
1645 // A sleeping thread should not have any tasks on it's queue.
Alp Toker8f2d3f02014-02-24 10:40:15 +00001646 // There is a slight possibility that it resumes, steals a task from
Jim Cownie5e8470a2013-09-27 10:38:44 +00001647 // another thread, which spawns more tasks, all in the that it takes
1648 // this thread to check => don't write an assertion that the victim's
1649 // queue is empty. Try stealing from a different thread.
1650 goto new_victim;
1651 }
1652
1653 // Now try to steal work from the selected thread
1654 first = TRUE;
1655 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1656 thread_finished, is_constrained )) != NULL)
1657 {
1658#if USE_ITT_BUILD && USE_ITT_NOTIFY
1659 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1660 if ( itt_sync_obj == NULL ) {
1661 // we are at fork barrier where we could not get the object reliably
1662 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1663 }
1664 __kmp_itt_task_starting( itt_sync_obj );
1665 }
1666#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1667 __kmp_invoke_task( gtid, task, current_task );
1668#if USE_ITT_BUILD
1669 if ( itt_sync_obj != NULL )
1670 __kmp_itt_task_finished( itt_sync_obj );
1671#endif /* USE_ITT_BUILD */
1672
1673 // Try stealing from this victim again, in the future.
1674 if (first) {
1675 threads_data[ tid ].td.td_deque_last_stolen = k;
1676 first = FALSE;
1677 }
1678
1679 // Check to see if this thread can proceed.
1680 if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1681 KA_TRACE(15, ("__kmp_execute_tasks(exit #5): T#%d spin condition satisfied\n",
1682 gtid) );
1683 return TRUE;
1684 }
1685 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1686
1687 // If the execution of the stolen task resulted in more tasks being
1688 // placed on our run queue, then restart the whole process.
1689 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1690 KA_TRACE(20, ("__kmp_execute_tasks: T#%d stolen task spawned other tasks, restart\n",
1691 gtid) );
1692 goto start;
1693 }
1694 }
1695
1696 // The victims's work queue is empty. If we are in the final spin loop
1697 // of the barrier, check and see if the termination condition is satisfied.
1698 // Going on and finding a new victim to steal from is expensive, as it
1699 // involves a lot of cache misses, so we definitely want to re-check the
1700 // termination condition before doing that.
1701 if (final_spin) {
1702 // First, decrement the #unfinished threads, if that has not already
1703 // been done. This decrement might be to the spin location, and
1704 // result in the termination condition being satisfied.
1705 if (! *thread_finished) {
1706 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1707 KA_TRACE(20, ("__kmp_execute_tasks(dec #3): T#%d dec unfinished_threads to %d; "
1708 "task_team=%p\n",
1709 gtid, count, task_team) );
1710 *thread_finished = TRUE;
1711 }
1712
1713 // If __kmp_tasking_mode != tskm_immediate_exec,
1714 // then it is now unsafe to reference thread->th.th_team !!!
1715 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1716 // thread to pass through the barrier, where it might reset each thread's
1717 // th.th_team field for the next parallel region.
1718 // If we can steal more work, we know that this has not happened yet.
1719 if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1720 KA_TRACE(15, ("__kmp_execute_tasks(exit #6): T#%d spin condition satisfied\n",
1721 gtid) );
1722 return TRUE;
1723 }
1724 }
1725 }
1726
1727 KA_TRACE(15, ("__kmp_execute_tasks(exit #7): T#%d can't find work\n", gtid) );
1728 return FALSE;
1729}
1730
1731
1732//-----------------------------------------------------------------------------
1733// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
1734// next barrier so they can assist in executing enqueued tasks.
1735// First thread in allocates the task team atomically.
1736
1737static void
1738__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
1739{
1740 kmp_team_t *team = this_thr->th.th_team;
1741 kmp_thread_data_t *threads_data;
1742 int nthreads, i, is_init_thread;
1743
1744 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
1745 __kmp_gtid_from_thread( this_thr ) ) );
1746
1747 KMP_DEBUG_ASSERT(task_team != NULL);
1748 KMP_DEBUG_ASSERT(team != NULL);
1749
1750 nthreads = task_team->tt.tt_nproc;
1751 KMP_DEBUG_ASSERT(nthreads > 0);
1752 KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
1753
1754 // Allocate or increase the size of threads_data if necessary
1755 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
1756
1757 if (!is_init_thread) {
1758 // Some other thread already set up the array.
1759 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
1760 __kmp_gtid_from_thread( this_thr ) ) );
1761 return;
1762 }
1763 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1764 KMP_DEBUG_ASSERT( threads_data != NULL );
1765
1766 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1767 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
1768 {
1769 // Release any threads sleeping at the barrier, so that they can steal
1770 // tasks and execute them. In extra barrier mode, tasks do not sleep
1771 // at the separate tasking barrier, so this isn't a problem.
1772 for (i = 0; i < nthreads; i++) {
1773 volatile kmp_uint *sleep_loc;
1774 kmp_info_t *thread = threads_data[i].td.td_thr;
1775
1776 if (i == this_thr->th.th_info.ds.ds_tid) {
1777 continue;
1778 }
1779 // Since we haven't locked the thread's suspend mutex lock at this
1780 // point, there is a small window where a thread might be putting
1781 // itself to sleep, but hasn't set the th_sleep_loc field yet.
1782 // To work around this, __kmp_execute_tasks() periodically checks
1783 // see if other threads are sleeping (using the same random
1784 // mechanism that is used for task stealing) and awakens them if
1785 // they are.
1786 if ( ( sleep_loc = (volatile kmp_uint *)
1787 TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
1788 {
1789 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
1790 __kmp_gtid_from_thread( this_thr ),
1791 __kmp_gtid_from_thread( thread ) ) );
1792 __kmp_resume( __kmp_gtid_from_thread( thread ), sleep_loc );
1793 }
1794 else {
1795 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
1796 __kmp_gtid_from_thread( this_thr ),
1797 __kmp_gtid_from_thread( thread ) ) );
1798 }
1799 }
1800 }
1801
1802 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
1803 __kmp_gtid_from_thread( this_thr ) ) );
1804}
1805
1806
1807/* ------------------------------------------------------------------------ */
1808/*
1809 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
1810 * like a shadow of the kmp_team_t data struct, with a different lifetime.
1811 * After a child * thread checks into a barrier and calls __kmp_release() from
1812 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
1813 * longer assume that the kmp_team_t structure is intact (at any moment, the
1814 * master thread may exit the barrier code and free the team data structure,
1815 * and return the threads to the thread pool).
1816 *
1817 * This does not work with the the tasking code, as the thread is still
1818 * expected to participate in the execution of any tasks that may have been
1819 * spawned my a member of the team, and the thread still needs access to all
1820 * to each thread in the team, so that it can steal work from it.
1821 *
1822 * Enter the existence of the kmp_task_team_t struct. It employs a reference
1823 * counting mechanims, and is allocated by the master thread before calling
1824 * __kmp_<barrier_kind>_release, and then is release by the last thread to
1825 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
1826 * of the kmp_task_team_t structs for consecutive barriers can overlap
1827 * (and will, unless the master thread is the last thread to exit the barrier
1828 * release phase, which is not typical).
1829 *
1830 * The existence of such a struct is useful outside the context of tasking,
1831 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
1832 * so that any performance differences show up when comparing the 2.5 vs. 3.0
1833 * libraries.
1834 *
1835 * We currently use the existence of the threads array as an indicator that
1836 * tasks were spawned since the last barrier. If the structure is to be
1837 * useful outside the context of tasking, then this will have to change, but
1838 * not settting the field minimizes the performance impact of tasking on
1839 * barriers, when no explicit tasks were spawned (pushed, actually).
1840 */
1841
1842static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
1843// Lock for task team data structures
1844static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
1845
1846
1847//------------------------------------------------------------------------------
1848// __kmp_alloc_task_deque:
1849// Allocates a task deque for a particular thread, and initialize the necessary
1850// data structures relating to the deque. This only happens once per thread
1851// per task team since task teams are recycled.
1852// No lock is needed during allocation since each thread allocates its own
1853// deque.
1854
1855static void
1856__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
1857{
1858 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
1859 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
1860
1861 // Initialize last stolen task field to "none"
1862 thread_data -> td.td_deque_last_stolen = -1;
1863
1864 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
1865 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
1866 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
1867
1868 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
1869 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
1870 // Allocate space for task deque, and zero the deque
1871 // Cannot use __kmp_thread_calloc() because threads not around for
1872 // kmp_reap_task_team( ).
1873 thread_data -> td.td_deque = (kmp_taskdata_t **)
1874 __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
1875}
1876
1877
1878//------------------------------------------------------------------------------
1879// __kmp_free_task_deque:
1880// Deallocates a task deque for a particular thread.
1881// Happens at library deallocation so don't need to reset all thread data fields.
1882
1883static void
1884__kmp_free_task_deque( kmp_thread_data_t *thread_data )
1885{
1886 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1887
1888 if ( thread_data -> td.td_deque != NULL ) {
1889 TCW_4(thread_data -> td.td_deque_ntasks, 0);
1890 __kmp_free( thread_data -> td.td_deque );
1891 thread_data -> td.td_deque = NULL;
1892 }
1893 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1894
1895#ifdef BUILD_TIED_TASK_STACK
1896 // GEH: Figure out what to do here for td_susp_tied_tasks
1897 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
1898 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
1899 }
1900#endif // BUILD_TIED_TASK_STACK
1901}
1902
1903
1904//------------------------------------------------------------------------------
1905// __kmp_realloc_task_threads_data:
1906// Allocates a threads_data array for a task team, either by allocating an initial
1907// array or enlarging an existing array. Only the first thread to get the lock
1908// allocs or enlarges the array and re-initializes the array eleemnts.
1909// That thread returns "TRUE", the rest return "FALSE".
1910// Assumes that the new array size is given by task_team -> tt.tt_nproc.
1911// The current size is given by task_team -> tt.tt_max_threads.
1912
1913static int
1914__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
1915{
1916 kmp_thread_data_t ** threads_data_p;
1917 kmp_int32 nthreads, maxthreads;
1918 int is_init_thread = FALSE;
1919
1920 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
1921 // Already reallocated and initialized.
1922 return FALSE;
1923 }
1924
1925 threads_data_p = & task_team -> tt.tt_threads_data;
1926 nthreads = task_team -> tt.tt_nproc;
1927 maxthreads = task_team -> tt.tt_max_threads;
1928
1929 // All threads must lock when they encounter the first task of the implicit task
1930 // region to make sure threads_data fields are (re)initialized before used.
1931 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
1932
1933 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
1934 // first thread to enable tasking
1935 kmp_team_t *team = thread -> th.th_team;
1936 int i;
1937
1938 is_init_thread = TRUE;
1939 if ( maxthreads < nthreads ) {
1940
1941 if ( *threads_data_p != NULL ) {
1942 kmp_thread_data_t *old_data = *threads_data_p;
1943 kmp_thread_data_t *new_data = NULL;
1944
1945 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
1946 "threads data for task_team %p, new_size = %d, old_size = %d\n",
1947 __kmp_gtid_from_thread( thread ), task_team,
1948 nthreads, maxthreads ) );
1949 // Reallocate threads_data to have more elements than current array
1950 // Cannot use __kmp_thread_realloc() because threads not around for
1951 // kmp_reap_task_team( ). Note all new array entries are initialized
1952 // to zero by __kmp_allocate().
1953 new_data = (kmp_thread_data_t *)
1954 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
1955 // copy old data to new data
1956 memcpy( (void *) new_data, (void *) old_data,
1957 maxthreads * sizeof(kmp_taskdata_t *) );
1958
1959#ifdef BUILD_TIED_TASK_STACK
1960 // GEH: Figure out if this is the right thing to do
1961 for (i = maxthreads; i < nthreads; i++) {
1962 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1963 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
1964 }
1965#endif // BUILD_TIED_TASK_STACK
1966 // Install the new data and free the old data
1967 (*threads_data_p) = new_data;
1968 __kmp_free( old_data );
1969 }
1970 else {
1971 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
1972 "threads data for task_team %p, size = %d\n",
1973 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
1974 // Make the initial allocate for threads_data array, and zero entries
1975 // Cannot use __kmp_thread_calloc() because threads not around for
1976 // kmp_reap_task_team( ).
1977 *threads_data_p = (kmp_thread_data_t *)
1978 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
1979#ifdef BUILD_TIED_TASK_STACK
1980 // GEH: Figure out if this is the right thing to do
1981 for (i = 0; i < nthreads; i++) {
1982 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1983 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
1984 }
1985#endif // BUILD_TIED_TASK_STACK
1986 }
1987 task_team -> tt.tt_max_threads = nthreads;
1988 }
1989 else {
1990 // If array has (more than) enough elements, go ahead and use it
1991 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
1992 }
1993
1994 // initialize threads_data pointers back to thread_info structures
1995 for (i = 0; i < nthreads; i++) {
1996 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1997 thread_data -> td.td_thr = team -> t.t_threads[i];
1998
1999 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2000 // The last stolen field survives across teams / barrier, and the number
2001 // of threads may have changed. It's possible (likely?) that a new
2002 // parallel region will exhibit the same behavior as the previous region.
2003 thread_data -> td.td_deque_last_stolen = -1;
2004 }
2005 }
2006
2007 KMP_MB();
2008 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2009 }
2010
2011 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2012 return is_init_thread;
2013}
2014
2015
2016//------------------------------------------------------------------------------
2017// __kmp_free_task_threads_data:
2018// Deallocates a threads_data array for a task team, including any attached
2019// tasking deques. Only occurs at library shutdown.
2020
2021static void
2022__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2023{
2024 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2025 if ( task_team -> tt.tt_threads_data != NULL ) {
2026 int i;
2027 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2028 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2029 }
2030 __kmp_free( task_team -> tt.tt_threads_data );
2031 task_team -> tt.tt_threads_data = NULL;
2032 }
2033 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2034}
2035
2036
2037//------------------------------------------------------------------------------
2038// __kmp_allocate_task_team:
2039// Allocates a task team associated with a specific team, taking it from
2040// the global task team free list if possible. Also initializes data structures.
2041
2042static kmp_task_team_t *
2043__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2044{
2045 kmp_task_team_t *task_team = NULL;
2046 int nthreads;
2047
2048 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2049 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2050
2051 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2052 // Take a task team from the task team pool
2053 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2054 if (__kmp_free_task_teams != NULL) {
2055 task_team = __kmp_free_task_teams;
2056 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2057 task_team -> tt.tt_next = NULL;
2058 }
2059 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2060 }
2061
2062 if (task_team == NULL) {
2063 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2064 "task team for team %p\n",
2065 __kmp_gtid_from_thread( thread ), team ) );
2066 // Allocate a new task team if one is not available.
2067 // Cannot use __kmp_thread_malloc() because threads not around for
2068 // kmp_reap_task_team( ).
2069 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2070 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2071 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2072 //task_team -> tt.tt_max_threads = 0;
2073 //task_team -> tt.tt_next = NULL;
2074 }
2075
2076 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2077 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2078
2079 task_team -> tt.tt_state = 0;
2080 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2081 TCW_4( task_team -> tt.tt_active, TRUE );
2082 TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2083
2084 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2085 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2086 return task_team;
2087}
2088
2089
2090//------------------------------------------------------------------------------
2091// __kmp_free_task_team:
2092// Frees the task team associated with a specific thread, and adds it
2093// to the global task team free list.
2094//
2095
2096static void
2097__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2098{
2099 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2100 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2101
2102 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2103
2104 // Put task team back on free list
2105 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2106
2107 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2108 task_team -> tt.tt_next = __kmp_free_task_teams;
2109 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2110 TCW_PTR(__kmp_free_task_teams, task_team);
2111
2112 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2113}
2114
2115
2116//------------------------------------------------------------------------------
2117// __kmp_reap_task_teams:
2118// Free all the task teams on the task team free list.
2119// Should only be done during library shutdown.
2120// Cannot do anything that needs a thread structure or gtid since they are already gone.
2121
2122void
2123__kmp_reap_task_teams( void )
2124{
2125 kmp_task_team_t *task_team;
2126
2127 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2128 // Free all task_teams on the free list
2129 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2130 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2131 __kmp_free_task_teams = task_team -> tt.tt_next;
2132 task_team -> tt.tt_next = NULL;
2133
2134 // Free threads_data if necessary
2135 if ( task_team -> tt.tt_threads_data != NULL ) {
2136 __kmp_free_task_threads_data( task_team );
2137 }
2138 __kmp_free( task_team );
2139 }
2140 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2141 }
2142}
2143
2144
2145//------------------------------------------------------------------------------
2146// __kmp_unref_task_teams:
2147// Remove one thread from referencing the task team structure by
2148// decreasing the reference count and deallocate task team if no more
2149// references to it.
2150//
2151void
2152__kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2153{
2154 kmp_uint ref_ct;
2155
2156 ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2157
2158 KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2159 __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2160
2161
2162 if ( ref_ct == 0 ) {
2163 __kmp_free_task_team( thread, task_team );
2164 }
2165
2166 TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2167}
2168
2169
2170//------------------------------------------------------------------------------
2171// __kmp_wait_to_unref_task_teams:
2172// Some threads could still be in the fork barrier release code, possibly
2173// trying to steal tasks. Wait for each thread to unreference its task team.
2174//
2175void
2176__kmp_wait_to_unref_task_teams(void)
2177{
2178 kmp_info_t *thread;
2179 kmp_uint32 spins;
2180 int done;
2181
2182 KMP_INIT_YIELD( spins );
2183
2184
2185 for (;;) {
2186 done = TRUE;
2187
2188 // TODO: GEH - this may be is wrong because some sync would be necessary
2189 // in case threads are added to the pool during the traversal.
2190 // Need to verify that lock for thread pool is held when calling
2191 // this routine.
2192 for (thread = (kmp_info_t *)__kmp_thread_pool;
2193 thread != NULL;
2194 thread = thread->th.th_next_pool)
2195 {
2196 volatile kmp_uint *sleep_loc;
2197#if KMP_OS_WINDOWS
2198 DWORD exit_val;
2199#endif
2200 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2201 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2202 __kmp_gtid_from_thread( thread ) ) );
2203 continue;
2204 }
2205#if KMP_OS_WINDOWS
2206 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2207 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2208 if (TCR_PTR(thread->th.th_task_team) != NULL) {
2209 __kmp_unref_task_team( thread->th.th_task_team, thread );
2210 }
2211 continue;
2212 }
2213#endif
2214
2215 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2216
2217 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2218 __kmp_gtid_from_thread( thread ) ) );
2219
2220 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2221 // If the thread is sleeping, awaken it.
2222 if ( ( sleep_loc = (volatile kmp_uint *) TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2223 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2224 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2225 __kmp_resume( __kmp_gtid_from_thread( thread ), sleep_loc );
2226 }
2227 }
2228 }
2229 if (done) {
2230 break;
2231 }
2232
2233 // If we are oversubscribed,
2234 // or have waited a bit (and library mode is throughput), yield.
2235 // Pause is in the following code.
2236 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2237 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2238 }
2239
2240
2241}
2242
2243
2244//------------------------------------------------------------------------------
2245// __kmp_task_team_setup: Create a task_team for the current team, but use
2246// an already created, unused one if it already exists.
2247// This may be called by any thread, but only for teams with # threads >1.
2248
2249void
2250__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team )
2251{
2252 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2253
2254 if ( ( team->t.t_task_team == NULL ) && ( team->t.t_nproc > 1 ) ) {
2255 // Allocate a new task team, which will be propagated to
2256 // all of the worker threads after the barrier. As they
2257 // spin in the barrier release phase, then will continue
2258 // to use the previous task team struct, until they receive
2259 // the signal to stop checking for tasks (they can't safely
2260 // reference the kmp_team_t struct, which could be reallocated
2261 // by the master thread).
2262 team->t.t_task_team = __kmp_allocate_task_team( this_thr, team );
2263 KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new "
2264 "task_team %p for team %d\n",
2265 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team,
2266 ((team != NULL) ? team->t.t_id : -1)) );
2267 }
2268 else {
2269 // All threads have reported in, and no tasks were spawned
2270 // for this release->gather region. Leave the old task
2271 // team struct in place for the upcoming region. No task
2272 // teams are formed for serialized teams.
2273 }
2274 if ( team->t.t_task_team != NULL ) {
2275 // Toggle the state flag so that we can tell which side of
2276 // the barrier we are on.
2277 team->t.t_task_team->tt.tt_state = 1 - this_thr->th.th_task_state;
2278 }
2279}
2280
2281
2282//------------------------------------------------------------------------------
2283// __kmp_task_team_sync: Propagation of task team data from team to threads
2284// which happens just after the release phase of a team barrier. This may be
2285// called by any thread, but only for teams with # threads > 1.
2286
2287void
2288__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2289{
2290 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2291
2292 // On the rare chance that this thread never saw that the task
2293 // team was no longer active, then unref/deallocate it now.
2294 if ( this_thr->th.th_task_team != NULL ) {
2295 if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2296 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2297 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
2298 } else {
2299 //
2300 // We are re-using a task team that was never enabled.
2301 //
2302 KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
2303 }
2304 }
2305
2306 //
2307 // It is now safe to propagate the task team pointer from the
2308 // team struct to the current thread.
2309 //
2310 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team);
2311 if ( this_thr->th.th_task_team != NULL ) {
2312 //
2313 // Toggle the th_task_state field, instead of reading it from
2314 // the task team. Reading the tt_state field at this point
2315 // causes a 30% regression on EPCC parallel - toggling it
2316 // is much cheaper.
2317 //
2318 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2319 KMP_DEBUG_ASSERT( this_thr->th.th_task_state == TCR_4(team->t.t_task_team->tt.tt_state) );
2320 }
2321 KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2322 __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2323 this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2324}
2325
2326
2327//------------------------------------------------------------------------------
2328// __kmp_task_team_wait: Master thread waits for outstanding tasks after
2329// the barrier gather phase. Only called by master thread if #threads
2330// in team > 1 !
2331
2332void
Jim Cownie181b4bb2013-12-23 17:28:57 +00002333__kmp_task_team_wait( kmp_info_t *this_thr,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002334 kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002335 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002336 )
2337{
2338 kmp_task_team_t *task_team = team->t.t_task_team;
2339
2340 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2341 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2342
2343 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
2344 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2345 __kmp_gtid_from_thread( this_thr ), task_team ) );
2346 //
2347 // All worker threads might have dropped through to the
2348 // release phase, but could still be executing tasks.
2349 // Wait here for all tasks to complete. To avoid memory
2350 // contention, only the master thread checks for the
2351 // termination condition.
2352 //
2353 __kmp_wait_sleep( this_thr, &task_team->tt.tt_unfinished_threads, 0, TRUE
Jim Cownie181b4bb2013-12-23 17:28:57 +00002354 USE_ITT_BUILD_ARG(itt_sync_obj)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002355 );
2356
2357 //
2358 // Kill the old task team, so that the worker threads will
2359 // stop referencing it while spinning. They will
2360 // deallocate it when the reference count reaches zero.
2361 // The master thread is not included in the ref count.
2362 //
2363 KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2364 __kmp_gtid_from_thread( this_thr ), task_team ) );
2365 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2366 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2367 KMP_MB();
2368
2369 TCW_PTR(this_thr->th.th_task_team, NULL);
2370 team->t.t_task_team = NULL;
2371 }
2372}
2373
2374
2375//------------------------------------------------------------------------------
2376// __kmp_tasking_barrier:
2377// Internal function to execute all tasks prior to a regular barrier or a
2378// join barrier. It is a full barrier itself, which unfortunately turns
2379// regular barriers into double barriers and join barriers into 1 1/2
2380// barriers.
2381// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2382
2383void
2384__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2385{
2386 volatile kmp_uint32 *spin = &team->t.t_task_team->tt.tt_unfinished_threads;
2387 int flag = FALSE;
2388 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2389
2390#if USE_ITT_BUILD
2391 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2392#endif /* USE_ITT_BUILD */
Jim Cownie181b4bb2013-12-23 17:28:57 +00002393 while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag
2394 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002395#if USE_ITT_BUILD
2396 // TODO: What about itt_sync_obj??
2397 KMP_FSYNC_SPIN_PREPARE( spin );
2398#endif /* USE_ITT_BUILD */
2399
2400 if( TCR_4(__kmp_global.g.g_done) ) {
2401 if( __kmp_global.g.g_abort )
2402 __kmp_abort_thread( );
2403 break;
2404 }
2405 KMP_YIELD( TRUE ); // GH: We always yield here
2406 }
2407#if USE_ITT_BUILD
2408 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2409#endif /* USE_ITT_BUILD */
2410}
2411
2412#endif // OMP_30_ENABLED
2413