blob: f60506cda876e67ff93148e842de5e8732eba902 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jonathan Peyton45be4502015-08-11 21:36:41 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanove5f44922015-04-29 16:22:07 +000022#if OMPT_SUPPORT
23#include "ompt-specific.h"
24#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
Jim Cownie5e8470a2013-09-27 10:38:44 +000026/* ------------------------------------------------------------------------ */
27/* ------------------------------------------------------------------------ */
28
29
30/* forward declaration */
31static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
34
Andrey Churbanov535b6fa2015-05-07 17:41:51 +000035#ifdef OMP_41_ENABLED
36static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
37#endif
38
Jim Cownie5e8470a2013-09-27 10:38:44 +000039#ifdef BUILD_TIED_TASK_STACK
40
41//---------------------------------------------------------------------------
42// __kmp_trace_task_stack: print the tied tasks from the task stack in order
43// from top do bottom
44//
45// gtid: global thread identifier for thread containing stack
46// thread_data: thread data for task team thread containing stack
47// threshold: value above which the trace statement triggers
48// location: string identifying call site of this function (for trace)
49
50static void
51__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
52{
53 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
54 kmp_taskdata_t **stack_top = task_stack -> ts_top;
55 kmp_int32 entries = task_stack -> ts_entries;
56 kmp_taskdata_t *tied_task;
57
58 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
59 "first_block = %p, stack_top = %p \n",
60 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
61
62 KMP_DEBUG_ASSERT( stack_top != NULL );
63 KMP_DEBUG_ASSERT( entries > 0 );
64
65 while ( entries != 0 )
66 {
67 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
68 // fix up ts_top if we need to pop from previous block
69 if ( entries & TASK_STACK_INDEX_MASK == 0 )
70 {
71 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
72
73 stack_block = stack_block -> sb_prev;
74 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
75 }
76
77 // finish bookkeeping
78 stack_top--;
79 entries--;
80
81 tied_task = * stack_top;
82
83 KMP_DEBUG_ASSERT( tied_task != NULL );
84 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
85
86 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
87 "stack_top=%p, tied_task=%p\n",
88 location, gtid, entries, stack_top, tied_task ) );
89 }
90 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
91
92 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
93 location, gtid ) );
94}
95
96//---------------------------------------------------------------------------
97// __kmp_init_task_stack: initialize the task stack for the first time
98// after a thread_data structure is created.
99// It should not be necessary to do this again (assuming the stack works).
100//
101// gtid: global thread identifier of calling thread
102// thread_data: thread data for task team thread containing stack
103
104static void
105__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
106{
107 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
108 kmp_stack_block_t *first_block;
109
110 // set up the first block of the stack
111 first_block = & task_stack -> ts_first_block;
112 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
113 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
114
115 // initialize the stack to be empty
116 task_stack -> ts_entries = TASK_STACK_EMPTY;
117 first_block -> sb_next = NULL;
118 first_block -> sb_prev = NULL;
119}
120
121
122//---------------------------------------------------------------------------
123// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
124//
125// gtid: global thread identifier for calling thread
126// thread_data: thread info for thread containing stack
127
128static void
129__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
130{
131 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
132 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
133
134 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
135 // free from the second block of the stack
136 while ( stack_block != NULL ) {
137 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
138
139 stack_block -> sb_next = NULL;
140 stack_block -> sb_prev = NULL;
141 if (stack_block != & task_stack -> ts_first_block) {
142 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
143 }
144 stack_block = next_block;
145 }
146 // initialize the stack to be empty
147 task_stack -> ts_entries = 0;
148 task_stack -> ts_top = NULL;
149}
150
151
152//---------------------------------------------------------------------------
153// __kmp_push_task_stack: Push the tied task onto the task stack.
154// Grow the stack if necessary by allocating another block.
155//
156// gtid: global thread identifier for calling thread
157// thread: thread info for thread containing stack
158// tied_task: the task to push on the stack
159
160static void
161__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
162{
163 // GEH - need to consider what to do if tt_threads_data not allocated yet
164 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
165 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
166 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
167
168 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
169 return; // Don't push anything on stack if team or team tasks are serialized
170 }
171
172 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
173 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
174
175 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
176 gtid, thread, tied_task ) );
177 // Store entry
178 * (task_stack -> ts_top) = tied_task;
179
180 // Do bookkeeping for next push
181 task_stack -> ts_top++;
182 task_stack -> ts_entries++;
183
184 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
185 {
186 // Find beginning of this task block
187 kmp_stack_block_t *stack_block =
188 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
189
190 // Check if we already have a block
191 if ( stack_block -> sb_next != NULL )
192 { // reset ts_top to beginning of next block
193 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
194 }
195 else
196 { // Alloc new block and link it up
197 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
198 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
199
200 task_stack -> ts_top = & new_block -> sb_block[0];
201 stack_block -> sb_next = new_block;
202 new_block -> sb_prev = stack_block;
203 new_block -> sb_next = NULL;
204
205 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
206 gtid, tied_task, new_block ) );
207 }
208 }
209 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
210}
211
212//---------------------------------------------------------------------------
213// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
214// the task, just check to make sure it matches the ending task passed in.
215//
216// gtid: global thread identifier for the calling thread
217// thread: thread info structure containing stack
218// tied_task: the task popped off the stack
219// ending_task: the task that is ending (should match popped task)
220
221static void
222__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
223{
224 // GEH - need to consider what to do if tt_threads_data not allocated yet
225 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
226 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
227 kmp_taskdata_t *tied_task;
228
229 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
230 return; // Don't pop anything from stack if team or team tasks are serialized
231 }
232
233 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
234 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
235
236 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
237
238 // fix up ts_top if we need to pop from previous block
239 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
240 {
241 kmp_stack_block_t *stack_block =
242 (kmp_stack_block_t *) (task_stack -> ts_top) ;
243
244 stack_block = stack_block -> sb_prev;
245 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
246 }
247
248 // finish bookkeeping
249 task_stack -> ts_top--;
250 task_stack -> ts_entries--;
251
252 tied_task = * (task_stack -> ts_top );
253
254 KMP_DEBUG_ASSERT( tied_task != NULL );
255 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
256 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
257
258 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
259 return;
260}
261#endif /* BUILD_TIED_TASK_STACK */
262
263//---------------------------------------------------
264// __kmp_push_task: Add a task to the thread's deque
265
266static kmp_int32
267__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
268{
269 kmp_info_t * thread = __kmp_threads[ gtid ];
270 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
271 kmp_task_team_t * task_team = thread->th.th_task_team;
272 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
273 kmp_thread_data_t * thread_data;
274
275 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
276
Jonathan Peytone6643da2016-04-18 21:35:14 +0000277 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
278 // untied task needs to increment counter so that the task structure is not freed prematurely
279 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
280 KA_TRACE(20, ( "__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
281 gtid, counter, taskdata ) );
282 }
283
Jim Cownie5e8470a2013-09-27 10:38:44 +0000284 // The first check avoids building task_team thread data if serialized
285 if ( taskdata->td_flags.task_serial ) {
286 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
287 gtid, taskdata ) );
288 return TASK_NOT_PUSHED;
289 }
290
291 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
292 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000293 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000294 __kmp_enable_tasking( task_team, thread );
295 }
296 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
297 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
298
299 // Find tasking deque specific to encountering thread
300 thread_data = & task_team -> tt.tt_threads_data[ tid ];
301
302 // No lock needed since only owner can allocate
303 if (thread_data -> td.td_deque == NULL ) {
304 __kmp_alloc_task_deque( thread, thread_data );
305 }
306
307 // Check if deque is full
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000308 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000309 {
310 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
311 gtid, taskdata ) );
312 return TASK_NOT_PUSHED;
313 }
314
315 // Lock the deque for the task push operation
316 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
317
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000318#if OMP_41_ENABLED
319 // Need to recheck as we can get a proxy task from a thread outside of OpenMP
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000320 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000321 {
322 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
323 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
324 gtid, taskdata ) );
325 return TASK_NOT_PUSHED;
326 }
327#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000328 // Must have room since no thread can add tasks but calling thread
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000329 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE(thread_data->td) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000330#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000331
332 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
333 // Wrap index.
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000334 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000335 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
336
337 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
338
339 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
340 "task=%p ntasks=%d head=%u tail=%u\n",
341 gtid, taskdata, thread_data->td.td_deque_ntasks,
342 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
343
344 return TASK_SUCCESSFULLY_PUSHED;
345}
346
347
348//-----------------------------------------------------------------------------------------
349// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
350// this_thr: thread structure to set current_task in.
351
352void
353__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
354{
355 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
356 "curtask_parent=%p\n",
357 0, this_thr, this_thr -> th.th_current_task,
358 this_thr -> th.th_current_task -> td_parent ) );
359
360 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
361
362 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
363 "curtask_parent=%p\n",
364 0, this_thr, this_thr -> th.th_current_task,
365 this_thr -> th.th_current_task -> td_parent ) );
366}
367
368
369//---------------------------------------------------------------------------------------
370// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
371// this_thr: thread structure to set up
372// team: team for implicit task data
373// tid: thread within team to set up
374
375void
376__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
377{
378 // current task of the thread is a parent of the new just created implicit tasks of new team
379 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
380 "parent_task=%p\n",
381 tid, this_thr, this_thr->th.th_current_task,
382 team->t.t_implicit_task_taskdata[tid].td_parent ) );
383
384 KMP_DEBUG_ASSERT (this_thr != NULL);
385
386 if( tid == 0 ) {
387 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
388 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
389 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
390 }
391 } else {
392 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
393 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
394 }
395
396 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
397 "parent_task=%p\n",
398 tid, this_thr, this_thr->th.th_current_task,
399 team->t.t_implicit_task_taskdata[tid].td_parent ) );
400}
401
402
403//----------------------------------------------------------------------
404// __kmp_task_start: bookkeeping for a task starting execution
405// GTID: global thread id of calling thread
406// task: task starting execution
407// current_task: task suspending
408
409static void
410__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
411{
412 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
413 kmp_info_t * thread = __kmp_threads[ gtid ];
414
415 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
416 gtid, taskdata, current_task) );
417
418 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
419
420 // mark currently executing task as suspended
421 // TODO: GEH - make sure root team implicit task is initialized properly.
422 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
423 current_task -> td_flags.executing = 0;
424
425 // Add task to stack if tied
426#ifdef BUILD_TIED_TASK_STACK
427 if ( taskdata -> td_flags.tiedness == TASK_TIED )
428 {
429 __kmp_push_task_stack( gtid, thread, taskdata );
430 }
431#endif /* BUILD_TIED_TASK_STACK */
432
433 // mark starting task as executing and as current task
434 thread -> th.th_current_task = taskdata;
435
Jonathan Peytone6643da2016-04-18 21:35:14 +0000436 KMP_DEBUG_ASSERT( taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
437 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000438 taskdata -> td_flags.started = 1;
439 taskdata -> td_flags.executing = 1;
440 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
441 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
442
443 // GEH TODO: shouldn't we pass some sort of location identifier here?
444 // APT: yes, we will pass location here.
445 // need to store current thread state (in a thread or taskdata structure)
446 // before setting work_state, otherwise wrong state is set after end of task
447
448 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
449 gtid, taskdata ) );
450
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000451#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000452 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000453 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
454 kmp_taskdata_t *parent = taskdata->td_parent;
455 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
456 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
457 parent ? &(parent->ompt_task_info.frame) : NULL,
458 taskdata->ompt_task_info.task_id,
459 taskdata->ompt_task_info.function);
460 }
461#endif
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000462#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
463 /* OMPT emit all dependences if requested by the tool */
464 if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
465 ompt_callbacks.ompt_callback(ompt_event_task_dependences))
466 {
467 ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
468 taskdata->ompt_task_info.task_id,
469 taskdata->ompt_task_info.deps,
470 taskdata->ompt_task_info.ndeps
471 );
472 /* We can now free the allocated memory for the dependencies */
473 KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps);
474 taskdata->ompt_task_info.deps = NULL;
475 taskdata->ompt_task_info.ndeps = 0;
476 }
477#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000478
Jim Cownie5e8470a2013-09-27 10:38:44 +0000479 return;
480}
481
482
483//----------------------------------------------------------------------
484// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
485// loc_ref: source location information; points to beginning of task block.
486// gtid: global thread number.
487// task: task thunk for the started task.
488
489void
490__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
491{
492 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
493 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
494
495 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
496 gtid, loc_ref, taskdata, current_task ) );
497
Jonathan Peytone6643da2016-04-18 21:35:14 +0000498 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
499 // untied task needs to increment counter so that the task structure is not freed prematurely
500 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
501 KA_TRACE(20, ( "__kmpc_omp_task_begin_if0: T#%d untied_count (%d) incremented for task %p\n",
502 gtid, counter, taskdata ) );
503 }
504
Jim Cownie5e8470a2013-09-27 10:38:44 +0000505 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
506 __kmp_task_start( gtid, task, current_task );
507
508 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
509 gtid, loc_ref, taskdata ) );
510
511 return;
512}
513
514#ifdef TASK_UNUSED
515//----------------------------------------------------------------------
516// __kmpc_omp_task_begin: report that a given task has started execution
517// NEVER GENERATED BY COMPILER, DEPRECATED!!!
518
519void
520__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
521{
522 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
523
524 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
525 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
526
527 __kmp_task_start( gtid, task, current_task );
528
529 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
530 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
531
532 return;
533}
534#endif // TASK_UNUSED
535
536
537//-------------------------------------------------------------------------------------
538// __kmp_free_task: free the current task space and the space for shareds
539// gtid: Global thread ID of calling thread
540// taskdata: task to free
541// thread: thread data structure of caller
542
543static void
544__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
545{
546 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
547 gtid, taskdata) );
548
549 // Check to make sure all flags and counters have the correct values
550 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
551 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
552 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
553 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
554 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
555 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
556
557 taskdata->td_flags.freed = 1;
558 // deallocate the taskdata and shared variable blocks associated with this task
559 #if USE_FAST_MEMORY
560 __kmp_fast_free( thread, taskdata );
561 #else /* ! USE_FAST_MEMORY */
562 __kmp_thread_free( thread, taskdata );
563 #endif
564
565 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
566 gtid, taskdata) );
567}
568
569//-------------------------------------------------------------------------------------
570// __kmp_free_task_and_ancestors: free the current task and ancestors without children
571//
572// gtid: Global thread ID of calling thread
573// taskdata: task to free
574// thread: thread data structure of caller
575
576static void
577__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
578{
579 kmp_int32 children = 0;
580 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
581
582 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
583
584 if ( !team_or_tasking_serialized ) {
585 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
586 KMP_DEBUG_ASSERT( children >= 0 );
587 }
588
589 // Now, go up the ancestor tree to see if any ancestors can now be freed.
590 while ( children == 0 )
591 {
592 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
593
594 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
595 "and freeing itself\n", gtid, taskdata) );
596
597 // --- Deallocate my ancestor task ---
598 __kmp_free_task( gtid, taskdata, thread );
599
600 taskdata = parent_taskdata;
601
602 // Stop checking ancestors at implicit task or if tasking serialized
603 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
604 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
605 return;
606
607 if ( !team_or_tasking_serialized ) {
608 // Predecrement simulated by "- 1" calculation
609 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
610 KMP_DEBUG_ASSERT( children >= 0 );
611 }
612 }
613
614 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
615 "not freeing it yet\n", gtid, taskdata, children) );
616}
617
618//---------------------------------------------------------------------
619// __kmp_task_finish: bookkeeping to do when a task finishes execution
620// gtid: global thread ID for calling thread
621// task: task to be finished
622// resumed_task: task to be resumed. (may be NULL if task is serialized)
623
624static void
625__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
626{
627 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
628 kmp_info_t * thread = __kmp_threads[ gtid ];
629 kmp_int32 children = 0;
630
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000631#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000632 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000633 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
634 kmp_taskdata_t *parent = taskdata->td_parent;
635 ompt_callbacks.ompt_callback(ompt_event_task_end)(
636 taskdata->ompt_task_info.task_id);
637 }
638#endif
639
Jim Cownie5e8470a2013-09-27 10:38:44 +0000640 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
641 gtid, taskdata, resumed_task) );
642
643 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
644
645 // Pop task from stack if tied
646#ifdef BUILD_TIED_TASK_STACK
647 if ( taskdata -> td_flags.tiedness == TASK_TIED )
648 {
649 __kmp_pop_task_stack( gtid, thread, taskdata );
650 }
651#endif /* BUILD_TIED_TASK_STACK */
652
Jonathan Peytone6643da2016-04-18 21:35:14 +0000653 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
654 // untied task needs to check the counter so that the task structure is not freed prematurely
655 kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
656 KA_TRACE(20, ( "__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
657 gtid, counter, taskdata ) );
658 if ( counter > 0 ) {
659 // untied task is not done, to be continued possibly by other thread, do not free it now
660 if (resumed_task == NULL) {
661 KMP_DEBUG_ASSERT( taskdata->td_flags.task_serial );
662 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
663 }
664 thread->th.th_current_task = resumed_task; // restore current_task
665 resumed_task->td_flags.executing = 1; // resume previous task
666 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, resuming task %p\n",
667 gtid, taskdata, resumed_task) );
668 return;
669 }
670 }
671
Jim Cownie5e8470a2013-09-27 10:38:44 +0000672 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000673 taskdata -> td_flags.complete = 1; // mark the task as completed
674 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
675 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
676
677 // Only need to keep track of count if team parallel and tasking not serialized
678 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
679 // Predecrement simulated by "- 1" calculation
680 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
681 KMP_DEBUG_ASSERT( children >= 0 );
682#if OMP_40_ENABLED
683 if ( taskdata->td_taskgroup )
684 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000685 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000686#endif
687 }
688
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000689 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
690 // Othertwise, if a task is executed immediately from the release_deps code
691 // the flag will be reset to 1 again by this same function
692 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
693 taskdata -> td_flags.executing = 0; // suspend the finishing task
694
Jim Cownie5e8470a2013-09-27 10:38:44 +0000695 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
696 gtid, taskdata, children) );
697
Jim Cownie181b4bb2013-12-23 17:28:57 +0000698#if OMP_40_ENABLED
699 /* If the tasks' destructor thunk flag has been set, we need to invoke the
700 destructor thunk that has been generated by the compiler.
701 The code is placed here, since at this point other tasks might have been released
702 hence overlapping the destructor invokations with some other work in the
703 released tasks. The OpenMP spec is not specific on when the destructors are
704 invoked, so we should be free to choose.
Jonathan Peyton28510722016-02-25 18:04:09 +0000705 */
Jim Cownie181b4bb2013-12-23 17:28:57 +0000706 if (taskdata->td_flags.destructors_thunk) {
Jonathan Peyton28510722016-02-25 18:04:09 +0000707 kmp_routine_entry_t destr_thunk = task->data1.destructors;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000708 KMP_ASSERT(destr_thunk);
709 destr_thunk(gtid, task);
710 }
711#endif // OMP_40_ENABLED
712
Jim Cownie5e8470a2013-09-27 10:38:44 +0000713 // bookkeeping for resuming task:
714 // GEH - note tasking_ser => task_serial
715 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
716 taskdata->td_flags.task_serial);
717 if ( taskdata->td_flags.task_serial )
718 {
719 if (resumed_task == NULL) {
720 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
721 }
722 else {
723 // verify resumed task passed in points to parent
724 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
725 }
726 }
727 else {
728 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
729 }
730
731 // Free this task and then ancestor tasks if they have no children.
Jonathan Peyton727ba6e2016-01-27 21:20:26 +0000732 // Restore th_current_task first as suggested by John:
733 // johnmc: if an asynchronous inquiry peers into the runtime system
734 // it doesn't see the freed task as the current task.
735 thread->th.th_current_task = resumed_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000736 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
737
Jim Cownie5e8470a2013-09-27 10:38:44 +0000738 // TODO: GEH - make sure root team implicit task is initialized properly.
739 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
740 resumed_task->td_flags.executing = 1; // resume previous task
741
742 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
743 gtid, taskdata, resumed_task) );
744
745 return;
746}
747
748//---------------------------------------------------------------------
749// __kmpc_omp_task_complete_if0: report that a task has completed execution
750// loc_ref: source location information; points to end of task block.
751// gtid: global thread number.
752// task: task thunk for the completed task.
753
754void
755__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
756{
757 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
758 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
759
760 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
761
762 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
763 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
764
765 return;
766}
767
768#ifdef TASK_UNUSED
769//---------------------------------------------------------------------
770// __kmpc_omp_task_complete: report that a task has completed execution
771// NEVER GENERATED BY COMPILER, DEPRECATED!!!
772
773void
774__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
775{
776 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
777 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
778
779 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
780
781 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
782 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
783 return;
784}
785#endif // TASK_UNUSED
786
787
Andrey Churbanove5f44922015-04-29 16:22:07 +0000788#if OMPT_SUPPORT
789//----------------------------------------------------------------------------------------------------
790// __kmp_task_init_ompt:
Jonathan Peytonb401db62015-10-09 17:38:05 +0000791// Initialize OMPT fields maintained by a task. This will only be called after
792// ompt_tool, so we already know whether ompt is enabled or not.
Andrey Churbanove5f44922015-04-29 16:22:07 +0000793
Jonathan Peytonb401db62015-10-09 17:38:05 +0000794static inline void
795__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function )
Andrey Churbanove5f44922015-04-29 16:22:07 +0000796{
Jonathan Peytonb401db62015-10-09 17:38:05 +0000797 if (ompt_enabled) {
798 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
799 task->ompt_task_info.function = function;
800 task->ompt_task_info.frame.exit_runtime_frame = NULL;
801 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000802#if OMP_40_ENABLED
803 task->ompt_task_info.ndeps = 0;
804 task->ompt_task_info.deps = NULL;
805#endif /* OMP_40_ENABLED */
Jonathan Peytonb401db62015-10-09 17:38:05 +0000806 }
Andrey Churbanove5f44922015-04-29 16:22:07 +0000807}
808#endif
809
810
Jim Cownie5e8470a2013-09-27 10:38:44 +0000811//----------------------------------------------------------------------------------------------------
812// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
813//
814// loc_ref: reference to source location of parallel region
815// this_thr: thread data structure corresponding to implicit task
816// team: team for this_thr
817// tid: thread id of given thread within team
818// set_curr_task: TRUE if need to push current task to thread
819// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
820// TODO: Get better loc_ref. Value passed in may be NULL
821
822void
823__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
824{
825 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
826
827 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
828 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
829
830 task->td_task_id = KMP_GEN_TASK_ID();
831 task->td_team = team;
832// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
833 task->td_ident = loc_ref;
834 task->td_taskwait_ident = NULL;
835 task->td_taskwait_counter = 0;
836 task->td_taskwait_thread = 0;
837
838 task->td_flags.tiedness = TASK_TIED;
839 task->td_flags.tasktype = TASK_IMPLICIT;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000840#if OMP_41_ENABLED
841 task->td_flags.proxy = TASK_FULL;
842#endif
843
Jim Cownie5e8470a2013-09-27 10:38:44 +0000844 // All implicit tasks are executed immediately, not deferred
845 task->td_flags.task_serial = 1;
846 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
847 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
848
849 task->td_flags.started = 1;
850 task->td_flags.executing = 1;
851 task->td_flags.complete = 0;
852 task->td_flags.freed = 0;
853
Jim Cownie181b4bb2013-12-23 17:28:57 +0000854#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000855 task->td_dephash = NULL;
856 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000857#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000858
859 if (set_curr_task) { // only do this initialization the first time a thread is created
860 task->td_incomplete_child_tasks = 0;
861 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
862#if OMP_40_ENABLED
863 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
864#endif
865 __kmp_push_current_task_to_thread( this_thr, team, tid );
866 } else {
867 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
868 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
869 }
870
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000871#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +0000872 __kmp_task_init_ompt(task, tid, NULL);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000873#endif
874
Jim Cownie5e8470a2013-09-27 10:38:44 +0000875 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
876 tid, team, task ) );
877}
878
879// Round up a size to a power of two specified by val
880// Used to insert padding between structures co-allocated using a single malloc() call
881static size_t
882__kmp_round_up_to_val( size_t size, size_t val ) {
883 if ( size & ( val - 1 ) ) {
884 size &= ~ ( val - 1 );
885 if ( size <= KMP_SIZE_T_MAX - val ) {
886 size += val; // Round up if there is no overflow.
887 }; // if
888 }; // if
889 return size;
890} // __kmp_round_up_to_va
891
892
893//---------------------------------------------------------------------------------
894// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
895//
896// loc_ref: source location information
897// gtid: global thread number.
898// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
899// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
900// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
901// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
902// task_entry: Pointer to task code entry point generated by compiler.
903// returns: a pointer to the allocated kmp_task_t structure (task).
904
905kmp_task_t *
906__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
907 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
908 kmp_routine_entry_t task_entry )
909{
910 kmp_task_t *task;
911 kmp_taskdata_t *taskdata;
912 kmp_info_t *thread = __kmp_threads[ gtid ];
913 kmp_team_t *team = thread->th.th_team;
914 kmp_taskdata_t *parent_task = thread->th.th_current_task;
915 size_t shareds_offset;
916
917 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
918 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
919 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
920 sizeof_shareds, task_entry) );
921
922 if ( parent_task->td_flags.final ) {
923 if (flags->merged_if0) {
924 }
925 flags->final = 1;
926 }
927
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000928#if OMP_41_ENABLED
929 if ( flags->proxy == TASK_PROXY ) {
930 flags->tiedness = TASK_UNTIED;
931 flags->merged_if0 = 1;
932
933 /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
934 if ( (thread->th.th_task_team) == NULL ) {
935 /* This should only happen if the team is serialized
936 setup a task team and propagate it to the thread
937 */
938 KMP_DEBUG_ASSERT(team->t.t_serialized);
939 KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
Jonathan Peyton54127982015-11-04 21:37:48 +0000940 __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000941 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
942 }
943 kmp_task_team_t * task_team = thread->th.th_task_team;
944
945 /* tasking must be enabled now as the task might not be pushed */
946 if ( !KMP_TASKING_ENABLED( task_team ) ) {
947 KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
948 __kmp_enable_tasking( task_team, thread );
949 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
950 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
951 // No lock needed since only owner can allocate
952 if (thread_data -> td.td_deque == NULL ) {
953 __kmp_alloc_task_deque( thread, thread_data );
954 }
955 }
956
957 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
958 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
959 }
960#endif
961
Jim Cownie5e8470a2013-09-27 10:38:44 +0000962 // Calculate shared structure offset including padding after kmp_task_t struct
963 // to align pointers in shared struct
964 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
965 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
966
967 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
968 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
969 gtid, shareds_offset) );
970 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
971 gtid, sizeof_shareds) );
972
973 // Avoid double allocation here by combining shareds with taskdata
974 #if USE_FAST_MEMORY
975 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
976 #else /* ! USE_FAST_MEMORY */
977 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
978 #endif /* USE_FAST_MEMORY */
979
980 task = KMP_TASKDATA_TO_TASK(taskdata);
981
982 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000983#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000984 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
985 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
986#else
987 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
988 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
989#endif
990 if (sizeof_shareds > 0) {
991 // Avoid double allocation here by combining shareds with taskdata
992 task->shareds = & ((char *) taskdata)[ shareds_offset ];
993 // Make sure shareds struct is aligned to pointer size
994 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
995 } else {
996 task->shareds = NULL;
997 }
998 task->routine = task_entry;
999 task->part_id = 0; // AC: Always start with 0 part id
1000
1001 taskdata->td_task_id = KMP_GEN_TASK_ID();
1002 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001003 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001004 taskdata->td_parent = parent_task;
1005 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
Jonathan Peytone6643da2016-04-18 21:35:14 +00001006 taskdata->td_untied_count = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001007 taskdata->td_ident = loc_ref;
1008 taskdata->td_taskwait_ident = NULL;
1009 taskdata->td_taskwait_counter = 0;
1010 taskdata->td_taskwait_thread = 0;
1011 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001012#if OMP_41_ENABLED
1013 // avoid copying icvs for proxy tasks
1014 if ( flags->proxy == TASK_FULL )
1015#endif
1016 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001017
1018 taskdata->td_flags.tiedness = flags->tiedness;
1019 taskdata->td_flags.final = flags->final;
1020 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001021#if OMP_40_ENABLED
1022 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1023#endif // OMP_40_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001024#if OMP_41_ENABLED
1025 taskdata->td_flags.proxy = flags->proxy;
Jonathan Peyton134f90d2016-02-11 23:07:30 +00001026 taskdata->td_task_team = thread->th.th_task_team;
Jonathan Peyton283a2152016-03-02 22:47:51 +00001027 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001028#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001029 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1030
1031 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1032 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
1033
1034 // GEH - TODO: fix this to copy parent task's value of team_serial flag
1035 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
1036
1037 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
1038 // tasks are not left until program termination to execute. Also, it helps locality to execute
1039 // immediately.
Jonathan Peyton7881aa12015-05-21 21:16:38 +00001040 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
Jim Cownie5e8470a2013-09-27 10:38:44 +00001041 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
1042
1043 taskdata->td_flags.started = 0;
1044 taskdata->td_flags.executing = 0;
1045 taskdata->td_flags.complete = 0;
1046 taskdata->td_flags.freed = 0;
1047
1048 taskdata->td_flags.native = flags->native;
1049
1050 taskdata->td_incomplete_child_tasks = 0;
1051 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1052#if OMP_40_ENABLED
1053 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1054 taskdata->td_dephash = NULL;
1055 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001056#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001057
1058 // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
1059#if OMP_41_ENABLED
Jonathan Peyton61118492016-05-20 19:03:38 +00001060 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001061#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001062 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001063#endif
1064 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001065 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1066#if OMP_40_ENABLED
1067 if ( parent_task->td_taskgroup )
1068 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1069#endif
1070 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1071 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1072 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1073 }
1074 }
1075
1076 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1077 gtid, taskdata, taskdata->td_parent) );
1078
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001079#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +00001080 __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001081#endif
1082
Jim Cownie5e8470a2013-09-27 10:38:44 +00001083 return task;
1084}
1085
1086
1087kmp_task_t *
1088__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1089 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1090 kmp_routine_entry_t task_entry )
1091{
1092 kmp_task_t *retval;
1093 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1094
1095 input_flags->native = FALSE;
1096 // __kmp_task_alloc() sets up all other runtime flags
1097
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001098#if OMP_41_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001099 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001100 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1101 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001102 input_flags->proxy ? "proxy" : "",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001103 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001104#else
1105 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1106 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1107 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1108 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1109#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001110
1111 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1112 sizeof_shareds, task_entry );
1113
1114 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1115
1116 return retval;
1117}
1118
1119//-----------------------------------------------------------
1120// __kmp_invoke_task: invoke the specified task
1121//
1122// gtid: global thread ID of caller
1123// task: the task to invoke
1124// current_task: the task to resume after task invokation
1125
1126static void
1127__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1128{
1129 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001130 kmp_uint64 cur_time;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001131#if OMP_40_ENABLED
1132 int discard = 0 /* false */;
1133#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001134 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1135 gtid, taskdata, current_task) );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00001136 KMP_DEBUG_ASSERT(task);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001137#if OMP_41_ENABLED
1138 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1139 taskdata->td_flags.complete == 1)
1140 {
1141 // This is a proxy task that was already completed but it needs to run
1142 // its bottom-half finish
1143 KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1144 gtid, taskdata) );
1145
1146 __kmp_bottom_half_finish_proxy(gtid,task);
1147
1148 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1149
1150 return;
1151 }
1152#endif
1153
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001154#if USE_ITT_BUILD && USE_ITT_NOTIFY
1155 if(__kmp_forkjoin_frames_mode == 3) {
1156 // Get the current time stamp to measure task execution time to correct barrier imbalance time
1157 cur_time = __itt_get_timestamp();
1158 }
1159#endif
1160
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001161#if OMP_41_ENABLED
1162 // Proxy tasks are not handled by the runtime
1163 if ( taskdata->td_flags.proxy != TASK_PROXY )
1164#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001165 __kmp_task_start( gtid, task, current_task );
1166
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001167#if OMPT_SUPPORT
1168 ompt_thread_info_t oldInfo;
1169 kmp_info_t * thread;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001170 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001171 // Store the threads states and restore them after the task
1172 thread = __kmp_threads[ gtid ];
1173 oldInfo = thread->th.ompt_thread_info;
1174 thread->th.ompt_thread_info.wait_id = 0;
1175 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1176 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1177 }
1178#endif
1179
Jim Cownie181b4bb2013-12-23 17:28:57 +00001180#if OMP_40_ENABLED
1181 // TODO: cancel tasks if the parallel region has also been cancelled
1182 // TODO: check if this sequence can be hoisted above __kmp_task_start
1183 // if cancellation has been enabled for this run ...
1184 if (__kmp_omp_cancellation) {
1185 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1186 kmp_team_t * this_team = this_thr->th.th_team;
1187 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1188 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001189 KMP_COUNT_BLOCK(TASK_cancelled);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001190 // this task belongs to a task group and we need to cancel it
1191 discard = 1 /* true */;
1192 }
1193 }
1194
Jim Cownie5e8470a2013-09-27 10:38:44 +00001195 //
1196 // Invoke the task routine and pass in relevant data.
1197 // Thunks generated by gcc take a different argument list.
1198 //
Jim Cownie181b4bb2013-12-23 17:28:57 +00001199 if (!discard) {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001200#if KMP_STATS_ENABLED
Jonathan Peyton45be4502015-08-11 21:36:41 +00001201 KMP_COUNT_BLOCK(TASK_executed);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001202 switch(KMP_GET_THREAD_STATE()) {
1203 case FORK_JOIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); break;
1204 case PLAIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); break;
1205 case TASKYIELD: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); break;
1206 case TASKWAIT: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); break;
1207 case TASKGROUP: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); break;
1208 default: KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); break;
1209 }
1210#endif // KMP_STATS_ENABLED
Jim Cownie181b4bb2013-12-23 17:28:57 +00001211#endif // OMP_40_ENABLED
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001212
1213#if OMPT_SUPPORT && OMPT_TRACE
1214 /* let OMPT know that we're about to run this task */
1215 if (ompt_enabled &&
1216 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1217 {
1218 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1219 current_task->ompt_task_info.task_id,
1220 taskdata->ompt_task_info.task_id);
1221 }
1222#endif
1223
Jim Cownie5e8470a2013-09-27 10:38:44 +00001224#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +00001225 if (taskdata->td_flags.native) {
1226 ((void (*)(void *))(*(task->routine)))(task->shareds);
1227 }
1228 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001229#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001230 {
1231 (*(task->routine))(gtid, task);
1232 }
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001233 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001234
1235#if OMPT_SUPPORT && OMPT_TRACE
1236 /* let OMPT know that we're returning to the callee task */
1237 if (ompt_enabled &&
1238 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1239 {
1240 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1241 taskdata->ompt_task_info.task_id,
1242 current_task->ompt_task_info.task_id);
1243 }
1244#endif
1245
Jim Cownie181b4bb2013-12-23 17:28:57 +00001246#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001247 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001248#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001249
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001250
1251#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001252 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001253 thread->th.ompt_thread_info = oldInfo;
1254 taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1255 }
1256#endif
1257
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001258#if OMP_41_ENABLED
1259 // Proxy tasks are not handled by the runtime
1260 if ( taskdata->td_flags.proxy != TASK_PROXY )
1261#endif
1262 __kmp_task_finish( gtid, task, current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001263
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001264#if USE_ITT_BUILD && USE_ITT_NOTIFY
1265 // Barrier imbalance - correct arrive time after the task finished
1266 if(__kmp_forkjoin_frames_mode == 3) {
1267 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1268 if(this_thr->th.th_bar_arrive_time) {
1269 this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1270 }
1271 }
1272#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001273 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001274 gtid, taskdata, current_task) );
1275 return;
1276}
1277
1278//-----------------------------------------------------------------------
1279// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1280//
1281// loc_ref: location of original task pragma (ignored)
1282// gtid: Global Thread ID of encountering thread
1283// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1284// Returns:
1285// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1286// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1287
1288kmp_int32
1289__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1290{
1291 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1292
1293 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1294 gtid, loc_ref, new_taskdata ) );
1295
1296 /* Should we execute the new task or queue it? For now, let's just always try to
1297 queue it. If the queue fills up, then we'll execute it. */
1298
1299 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1300 { // Execute this task immediately
1301 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1302 new_taskdata->td_flags.task_serial = 1;
1303 __kmp_invoke_task( gtid, new_task, current_task );
1304 }
1305
1306 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1307 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1308 new_taskdata ) );
1309
1310 return TASK_CURRENT_NOT_QUEUED;
1311}
1312
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001313//---------------------------------------------------------------------
1314// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1315// gtid: Global Thread ID of encountering thread
1316// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1317// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1318// returns:
1319//
1320// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1321// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1322kmp_int32
1323__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1324{
1325 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1326
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001327#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001328 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001329 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1330 __builtin_frame_address(0);
1331 }
1332#endif
1333
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001334 /* Should we execute the new task or queue it? For now, let's just always try to
1335 queue it. If the queue fills up, then we'll execute it. */
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001336#if OMP_41_ENABLED
1337 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1338#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001339 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001340#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001341 { // Execute this task immediately
1342 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1343 if ( serialize_immediate )
1344 new_taskdata -> td_flags.task_serial = 1;
1345 __kmp_invoke_task( gtid, new_task, current_task );
1346 }
1347
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001348#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001349 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001350 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1351 }
1352#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001353
1354 return TASK_CURRENT_NOT_QUEUED;
1355}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001356
1357//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001358// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1359// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001360// loc_ref: location of original task pragma (ignored)
1361// gtid: Global Thread ID of encountering thread
1362// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1363// returns:
1364//
1365// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1366// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1367
1368kmp_int32
1369__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1370{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001371 kmp_int32 res;
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001372 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001373
Jonathan Peytond2eb3c72015-08-26 20:02:21 +00001374#if KMP_DEBUG
1375 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1376#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001377 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1378 gtid, loc_ref, new_taskdata ) );
1379
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001380 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001381
1382 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1383 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001384 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001385}
1386
Jim Cownie5e8470a2013-09-27 10:38:44 +00001387//-------------------------------------------------------------------------------------
1388// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1389
1390kmp_int32
1391__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1392{
1393 kmp_taskdata_t * taskdata;
1394 kmp_info_t * thread;
1395 int thread_finished = FALSE;
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001396 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001397
Jonathan Peyton54127982015-11-04 21:37:48 +00001398 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001399
1400 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1401 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1402
1403 thread = __kmp_threads[ gtid ];
1404 taskdata = thread -> th.th_current_task;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001405
1406#if OMPT_SUPPORT && OMPT_TRACE
1407 ompt_task_id_t my_task_id;
1408 ompt_parallel_id_t my_parallel_id;
Jonathan Peyton61118492016-05-20 19:03:38 +00001409
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001410 if (ompt_enabled) {
1411 kmp_team_t *team = thread->th.th_team;
1412 my_task_id = taskdata->ompt_task_info.task_id;
1413 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peyton61118492016-05-20 19:03:38 +00001414
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001415 taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001416 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1417 ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
1418 my_parallel_id, my_task_id);
1419 }
1420 }
1421#endif
1422
Jim Cownie5e8470a2013-09-27 10:38:44 +00001423#if USE_ITT_BUILD
1424 // Note: These values are used by ITT events as well.
1425#endif /* USE_ITT_BUILD */
1426 taskdata->td_taskwait_counter += 1;
1427 taskdata->td_taskwait_ident = loc_ref;
1428 taskdata->td_taskwait_thread = gtid + 1;
1429
1430#if USE_ITT_BUILD
1431 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1432 if ( itt_sync_obj != NULL )
1433 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1434#endif /* USE_ITT_BUILD */
1435
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001436#if OMP_41_ENABLED
Jonathan Peyton61118492016-05-20 19:03:38 +00001437 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001438#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001439 if ( ! taskdata->td_flags.team_serial )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001440#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001441 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001442 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001443 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001444 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001445 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1446 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001447 }
1448 }
1449#if USE_ITT_BUILD
1450 if ( itt_sync_obj != NULL )
1451 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1452#endif /* USE_ITT_BUILD */
1453
1454 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1455 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001456
1457#if OMPT_SUPPORT && OMPT_TRACE
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001458 if (ompt_enabled) {
1459 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1460 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001461 my_parallel_id, my_task_id);
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001462 }
1463 taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001464 }
1465#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001466 }
1467
1468 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1469 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1470
1471 return TASK_CURRENT_NOT_QUEUED;
1472}
1473
1474
1475//-------------------------------------------------
1476// __kmpc_omp_taskyield: switch to a different task
1477
1478kmp_int32
1479__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1480{
1481 kmp_taskdata_t * taskdata;
1482 kmp_info_t * thread;
1483 int thread_finished = FALSE;
1484
Jonathan Peyton45be4502015-08-11 21:36:41 +00001485 KMP_COUNT_BLOCK(OMP_TASKYIELD);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001486 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001487
Jim Cownie5e8470a2013-09-27 10:38:44 +00001488 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1489 gtid, loc_ref, end_part) );
1490
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001491 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001492 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1493
1494 thread = __kmp_threads[ gtid ];
1495 taskdata = thread -> th.th_current_task;
1496 // Should we model this as a task wait or not?
1497#if USE_ITT_BUILD
1498 // Note: These values are used by ITT events as well.
1499#endif /* USE_ITT_BUILD */
1500 taskdata->td_taskwait_counter += 1;
1501 taskdata->td_taskwait_ident = loc_ref;
1502 taskdata->td_taskwait_thread = gtid + 1;
1503
1504#if USE_ITT_BUILD
1505 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1506 if ( itt_sync_obj != NULL )
1507 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1508#endif /* USE_ITT_BUILD */
1509 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001510 kmp_task_team_t * task_team = thread->th.th_task_team;
1511 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001512 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001513 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1514 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1515 }
1516 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001517 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001518#if USE_ITT_BUILD
1519 if ( itt_sync_obj != NULL )
1520 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1521#endif /* USE_ITT_BUILD */
1522
1523 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1524 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1525 }
1526
1527 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1528 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1529
1530 return TASK_CURRENT_NOT_QUEUED;
1531}
1532
1533
1534#if OMP_40_ENABLED
1535//-------------------------------------------------------------------------------------
1536// __kmpc_taskgroup: Start a new taskgroup
1537
1538void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001539__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001540{
1541 kmp_info_t * thread = __kmp_threads[ gtid ];
1542 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1543 kmp_taskgroup_t * tg_new =
1544 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1545 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1546 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001547 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001548 tg_new->parent = taskdata->td_taskgroup;
1549 taskdata->td_taskgroup = tg_new;
1550}
1551
1552
1553//-------------------------------------------------------------------------------------
1554// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1555// and its descendants are complete
1556
1557void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001558__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001559{
1560 kmp_info_t * thread = __kmp_threads[ gtid ];
1561 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1562 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1563 int thread_finished = FALSE;
1564
1565 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1566 KMP_DEBUG_ASSERT( taskgroup != NULL );
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001567 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001568
1569 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1570#if USE_ITT_BUILD
1571 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1572 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1573 if ( itt_sync_obj != NULL )
1574 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1575#endif /* USE_ITT_BUILD */
1576
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001577#if OMP_41_ENABLED
Jonathan Peyton61118492016-05-20 19:03:38 +00001578 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001579#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001580 if ( ! taskdata->td_flags.team_serial )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001581#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001582 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001583 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001584 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001585 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1586 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001587 }
1588 }
1589
1590#if USE_ITT_BUILD
1591 if ( itt_sync_obj != NULL )
1592 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1593#endif /* USE_ITT_BUILD */
1594 }
1595 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1596
1597 // Restore parent taskgroup for the current task
1598 taskdata->td_taskgroup = taskgroup->parent;
1599 __kmp_thread_free( thread, taskgroup );
1600
1601 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1602}
1603#endif
1604
1605
1606//------------------------------------------------------
1607// __kmp_remove_my_task: remove a task from my own deque
1608
1609static kmp_task_t *
1610__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1611 kmp_int32 is_constrained )
1612{
1613 kmp_task_t * task;
1614 kmp_taskdata_t * taskdata;
1615 kmp_thread_data_t *thread_data;
1616 kmp_uint32 tail;
1617
1618 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1619 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1620
1621 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1622
1623 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1624 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1625 thread_data->td.td_deque_tail) );
1626
1627 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1628 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1629 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1630 thread_data->td.td_deque_tail) );
1631 return NULL;
1632 }
1633
1634 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1635
1636 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1637 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1638 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1639 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1640 thread_data->td.td_deque_tail) );
1641 return NULL;
1642 }
1643
Jonathan Peytonf4f96952016-05-31 19:07:00 +00001644 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(thread_data->td); // Wrap index.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001645 taskdata = thread_data -> td.td_deque[ tail ];
1646
1647 if (is_constrained) {
1648 // we need to check if the candidate obeys task scheduling constraint:
1649 // only child of current task can be scheduled
1650 kmp_taskdata_t * current = thread->th.th_current_task;
1651 kmp_int32 level = current->td_level;
1652 kmp_taskdata_t * parent = taskdata->td_parent;
1653 while ( parent != current && parent->td_level > level ) {
1654 parent = parent->td_parent; // check generation up to the level of the current task
1655 KMP_DEBUG_ASSERT(parent != NULL);
1656 }
1657 if ( parent != current ) {
1658 // If the tail task is not a child, then no other childs can appear in the deque.
1659 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1660 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1661 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1662 thread_data->td.td_deque_tail) );
1663 return NULL;
1664 }
1665 }
1666
1667 thread_data -> td.td_deque_tail = tail;
1668 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1669
1670 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1671
1672 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1673 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1674 thread_data->td.td_deque_tail) );
1675
1676 task = KMP_TASKDATA_TO_TASK( taskdata );
1677 return task;
1678}
1679
1680
1681//-----------------------------------------------------------
1682// __kmp_steal_task: remove a task from another thread's deque
1683// Assume that calling thread has already checked existence of
1684// task_team thread_data before calling this routine.
1685
1686static kmp_task_t *
1687__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1688 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1689 kmp_int32 is_constrained )
1690{
1691 kmp_task_t * task;
1692 kmp_taskdata_t * taskdata;
1693 kmp_thread_data_t *victim_td, *threads_data;
Jonathan Peyton7c4d66d2015-06-08 20:01:14 +00001694 kmp_int32 victim_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695
1696 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1697
1698 threads_data = task_team -> tt.tt_threads_data;
1699 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1700
1701 victim_tid = victim->th.th_info.ds.ds_tid;
1702 victim_td = & threads_data[ victim_tid ];
1703
1704 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1705 "head=%u tail=%u\n",
1706 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1707 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1708
1709 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1710 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1711 {
1712 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1713 "ntasks=%d head=%u tail=%u\n",
1714 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1715 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1716 return NULL;
1717 }
1718
1719 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1720
1721 // Check again after we acquire the lock
1722 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1723 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1724 {
1725 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1726 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1727 "ntasks=%d head=%u tail=%u\n",
1728 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1729 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1730 return NULL;
1731 }
1732
1733 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1734
1735 if ( !is_constrained ) {
1736 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001737 KMP_ASSERT(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001738 // Bump head pointer and Wrap.
Jonathan Peytonf4f96952016-05-31 19:07:00 +00001739 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK(victim_td->td);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001740 } else {
1741 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
Jonathan Peytonf4f96952016-05-31 19:07:00 +00001742 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(victim_td->td); // Wrap index.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001743 taskdata = victim_td -> td.td_deque[ tail ];
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001744 KMP_ASSERT(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001745 // we need to check if the candidate obeys task scheduling constraint:
1746 // only child of current task can be scheduled
1747 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1748 kmp_int32 level = current->td_level;
1749 kmp_taskdata_t * parent = taskdata->td_parent;
1750 while ( parent != current && parent->td_level > level ) {
1751 parent = parent->td_parent; // check generation up to the level of the current task
1752 KMP_DEBUG_ASSERT(parent != NULL);
1753 }
1754 if ( parent != current ) {
1755 // If the tail task is not a child, then no other childs can appear in the deque (?).
1756 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1757 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1758 "ntasks=%d head=%u tail=%u\n",
1759 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1760 task_team, victim_td->td.td_deque_ntasks,
1761 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1762 return NULL;
1763 }
1764 victim_td -> td.td_deque_tail = tail;
1765 }
1766 if (*thread_finished) {
1767 // We need to un-mark this victim as a finished victim. This must be done before
1768 // releasing the lock, or else other threads (starting with the master victim)
1769 // might be prematurely released from the barrier!!!
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001770 kmp_uint32 count;
1771
1772 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001773
1774 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1775 gtid, count + 1, task_team) );
1776
1777 *thread_finished = FALSE;
1778 }
1779 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1780
1781 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1782
Jonathan Peyton45be4502015-08-11 21:36:41 +00001783 KMP_COUNT_BLOCK(TASK_stolen);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001784 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001785 "ntasks=%d head=%u tail=%u\n",
1786 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1787 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1788 victim_td->td.td_deque_tail) );
1789
1790 task = KMP_TASKDATA_TO_TASK( taskdata );
1791 return task;
1792}
1793
1794
1795//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001796// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001797// is statisfied (return true) or there are none left (return false).
1798// final_spin is TRUE if this is the spin at the release barrier.
1799// thread_finished indicates whether the thread is finished executing all
1800// the tasks it has on its deque, and is at the release barrier.
1801// spinner is the location on which to spin.
1802// spinner == NULL means only execute a single task and return.
1803// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001804template <class C>
Jonathan Peyton61118492016-05-20 19:03:38 +00001805static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001806 int *thread_finished
1807 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001808{
1809 kmp_task_team_t * task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001810 kmp_thread_data_t * threads_data;
1811 kmp_task_t * task;
1812 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1813 volatile kmp_uint32 * unfinished_threads;
1814 kmp_int32 nthreads, last_stolen, k, tid;
1815
1816 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1817 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1818
1819 task_team = thread -> th.th_task_team;
Jonathan Peyton54127982015-11-04 21:37:48 +00001820 if (task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001821
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001822 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001823 gtid, final_spin, *thread_finished) );
1824
1825 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1826 KMP_DEBUG_ASSERT( threads_data != NULL );
1827
1828 nthreads = task_team -> tt.tt_nproc;
1829 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001830#if OMP_41_ENABLED
1831 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1832#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001833 KMP_DEBUG_ASSERT( nthreads > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001834#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001835 KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1836
1837 // Choose tasks from our own work queue.
1838 start:
1839 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1840#if USE_ITT_BUILD && USE_ITT_NOTIFY
1841 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1842 if ( itt_sync_obj == NULL ) {
1843 // we are at fork barrier where we could not get the object reliably
1844 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1845 }
1846 __kmp_itt_task_starting( itt_sync_obj );
1847 }
1848#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1849 __kmp_invoke_task( gtid, task, current_task );
1850#if USE_ITT_BUILD
1851 if ( itt_sync_obj != NULL )
1852 __kmp_itt_task_finished( itt_sync_obj );
1853#endif /* USE_ITT_BUILD */
1854
1855 // If this thread is only partway through the barrier and the condition
1856 // is met, then return now, so that the barrier gather/release pattern can proceed.
1857 // If this thread is in the last spin loop in the barrier, waiting to be
1858 // released, we know that the termination condition will not be satisified,
1859 // so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001860 if (flag == NULL || (!final_spin && flag->done_check())) {
1861 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001862 return TRUE;
1863 }
Jonathan Peyton54127982015-11-04 21:37:48 +00001864 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001865 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1866 }
1867
1868 // This thread's work queue is empty. If we are in the final spin loop
1869 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001870#if OMP_41_ENABLED
1871 // The work queue may be empty but there might be proxy tasks still executing
Jonathan Peyton61118492016-05-20 19:03:38 +00001872 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001873#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001874 if (final_spin)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001875#endif
1876 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001877 // First, decrement the #unfinished threads, if that has not already
1878 // been done. This decrement might be to the spin location, and
1879 // result in the termination condition being satisfied.
1880 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001881 kmp_uint32 count;
1882
1883 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001884 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001885 gtid, count, task_team) );
1886 *thread_finished = TRUE;
1887 }
1888
1889 // It is now unsafe to reference thread->th.th_team !!!
1890 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1891 // thread to pass through the barrier, where it might reset each thread's
1892 // th.th_team field for the next parallel region.
1893 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001894 if (flag != NULL && flag->done_check()) {
1895 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001896 return TRUE;
1897 }
1898 }
1899
Jonathan Peyton54127982015-11-04 21:37:48 +00001900 if (thread->th.th_task_team == NULL) return FALSE;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001901#if OMP_41_ENABLED
1902 // check if there are other threads to steal from, otherwise go back
1903 if ( nthreads == 1 )
1904 goto start;
1905#endif
1906
Jim Cownie5e8470a2013-09-27 10:38:44 +00001907 // Try to steal from the last place I stole from successfully.
1908 tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1909 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1910
1911 if (last_stolen != -1) {
1912 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1913
1914 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1915 thread_finished, is_constrained )) != NULL)
1916 {
1917#if USE_ITT_BUILD && USE_ITT_NOTIFY
1918 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1919 if ( itt_sync_obj == NULL ) {
1920 // we are at fork barrier where we could not get the object reliably
1921 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1922 }
1923 __kmp_itt_task_starting( itt_sync_obj );
1924 }
1925#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1926 __kmp_invoke_task( gtid, task, current_task );
1927#if USE_ITT_BUILD
1928 if ( itt_sync_obj != NULL )
1929 __kmp_itt_task_finished( itt_sync_obj );
1930#endif /* USE_ITT_BUILD */
1931
1932 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001933 if (flag == NULL || (!final_spin && flag->done_check())) {
1934 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001935 gtid) );
1936 return TRUE;
1937 }
1938
Jonathan Peyton54127982015-11-04 21:37:48 +00001939 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001940 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1941 // If the execution of the stolen task resulted in more tasks being
1942 // placed on our run queue, then restart the whole process.
1943 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001944 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001945 gtid) );
1946 goto start;
1947 }
1948 }
1949
1950 // Don't give priority to stealing from this thread anymore.
1951 threads_data[ tid ].td.td_deque_last_stolen = -1;
1952
1953 // The victims's work queue is empty. If we are in the final spin loop
1954 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001955#if OMP_41_ENABLED
1956 // The work queue may be empty but there might be proxy tasks still executing
Jonathan Peyton61118492016-05-20 19:03:38 +00001957 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001958#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001959 if (final_spin)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001960#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001961 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001962 // First, decrement the #unfinished threads, if that has not already
1963 // been done. This decrement might be to the spin location, and
1964 // result in the termination condition being satisfied.
1965 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001966 kmp_uint32 count;
1967
1968 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001969 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001970 "task_team=%p\n", gtid, count, task_team) );
1971 *thread_finished = TRUE;
1972 }
1973
1974 // If __kmp_tasking_mode != tskm_immediate_exec
1975 // then it is now unsafe to reference thread->th.th_team !!!
1976 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1977 // thread to pass through the barrier, where it might reset each thread's
1978 // th.th_team field for the next parallel region.
1979 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001980 if (flag != NULL && flag->done_check()) {
1981 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001982 gtid) );
1983 return TRUE;
1984 }
1985 }
Jonathan Peyton54127982015-11-04 21:37:48 +00001986 if (thread->th.th_task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001987 }
1988
1989 // Find a different thread to steal work from. Pick a random thread.
1990 // My initial plan was to cycle through all the threads, and only return
1991 // if we tried to steal from every thread, and failed. Arch says that's
1992 // not such a great idea.
1993 // GEH - need yield code in this loop for throughput library mode?
1994 new_victim:
1995 k = __kmp_get_random( thread ) % (nthreads - 1);
1996 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1997 ++k; // Adjusts random distribution to exclude self
1998 }
1999 {
2000 kmp_info_t *other_thread = threads_data[k].td.td_thr;
2001 int first;
2002
2003 // There is a slight chance that __kmp_enable_tasking() did not wake up
2004 // all threads waiting at the barrier. If this thread is sleeping, then
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002005 // wake it up. Since we were going to pay the cache miss penalty
2006 // for referencing another thread's kmp_info_t struct anyway, the check
Jim Cownie5e8470a2013-09-27 10:38:44 +00002007 // shouldn't cost too much performance at this point.
2008 // In extra barrier mode, tasks do not sleep at the separate tasking
2009 // barrier, so this isn't a problem.
2010 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2011 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
2012 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
2013 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002014 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015 // A sleeping thread should not have any tasks on it's queue.
Alp Toker8f2d3f02014-02-24 10:40:15 +00002016 // There is a slight possibility that it resumes, steals a task from
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002017 // another thread, which spawns more tasks, all in the time that it takes
Jim Cownie5e8470a2013-09-27 10:38:44 +00002018 // this thread to check => don't write an assertion that the victim's
2019 // queue is empty. Try stealing from a different thread.
2020 goto new_victim;
2021 }
2022
2023 // Now try to steal work from the selected thread
2024 first = TRUE;
2025 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
2026 thread_finished, is_constrained )) != NULL)
2027 {
2028#if USE_ITT_BUILD && USE_ITT_NOTIFY
2029 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
2030 if ( itt_sync_obj == NULL ) {
2031 // we are at fork barrier where we could not get the object reliably
2032 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
2033 }
2034 __kmp_itt_task_starting( itt_sync_obj );
2035 }
2036#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
2037 __kmp_invoke_task( gtid, task, current_task );
2038#if USE_ITT_BUILD
2039 if ( itt_sync_obj != NULL )
2040 __kmp_itt_task_finished( itt_sync_obj );
2041#endif /* USE_ITT_BUILD */
2042
2043 // Try stealing from this victim again, in the future.
2044 if (first) {
2045 threads_data[ tid ].td.td_deque_last_stolen = k;
2046 first = FALSE;
2047 }
2048
2049 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002050 if (flag == NULL || (!final_spin && flag->done_check())) {
2051 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00002052 gtid) );
2053 return TRUE;
2054 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002055 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002056 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
2057
2058 // If the execution of the stolen task resulted in more tasks being
2059 // placed on our run queue, then restart the whole process.
2060 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002061 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00002062 gtid) );
2063 goto start;
2064 }
2065 }
2066
2067 // The victims's work queue is empty. If we are in the final spin loop
2068 // of the barrier, check and see if the termination condition is satisfied.
2069 // Going on and finding a new victim to steal from is expensive, as it
2070 // involves a lot of cache misses, so we definitely want to re-check the
2071 // termination condition before doing that.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002072#if OMP_41_ENABLED
2073 // The work queue may be empty but there might be proxy tasks still executing
Jonathan Peyton61118492016-05-20 19:03:38 +00002074 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002075#else
Jonathan Peyton61118492016-05-20 19:03:38 +00002076 if (final_spin)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002077#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002078 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002079 // First, decrement the #unfinished threads, if that has not already
2080 // been done. This decrement might be to the spin location, and
2081 // result in the termination condition being satisfied.
2082 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002083 kmp_uint32 count;
2084
2085 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002086 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
Jim Cownie5e8470a2013-09-27 10:38:44 +00002087 "task_team=%p\n",
2088 gtid, count, task_team) );
2089 *thread_finished = TRUE;
2090 }
2091
2092 // If __kmp_tasking_mode != tskm_immediate_exec,
2093 // then it is now unsafe to reference thread->th.th_team !!!
2094 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
2095 // thread to pass through the barrier, where it might reset each thread's
2096 // th.th_team field for the next parallel region.
2097 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002098 if (flag != NULL && flag->done_check()) {
2099 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002100 return TRUE;
2101 }
2102 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002103 if (thread->th.th_task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002104 }
2105
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002106 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002107 return FALSE;
2108}
2109
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002110int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
2111 int *thread_finished
2112 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2113{
2114 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2115 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2116}
2117
2118int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
2119 int *thread_finished
2120 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2121{
2122 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2123 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2124}
2125
2126int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
2127 int *thread_finished
2128 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2129{
2130 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2131 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2132}
2133
2134
Jim Cownie5e8470a2013-09-27 10:38:44 +00002135
2136//-----------------------------------------------------------------------------
2137// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2138// next barrier so they can assist in executing enqueued tasks.
2139// First thread in allocates the task team atomically.
2140
2141static void
2142__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2143{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002144 kmp_thread_data_t *threads_data;
2145 int nthreads, i, is_init_thread;
2146
2147 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2148 __kmp_gtid_from_thread( this_thr ) ) );
2149
2150 KMP_DEBUG_ASSERT(task_team != NULL);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002151 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002152
2153 nthreads = task_team->tt.tt_nproc;
2154 KMP_DEBUG_ASSERT(nthreads > 0);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002155 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002156
2157 // Allocate or increase the size of threads_data if necessary
2158 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2159
2160 if (!is_init_thread) {
2161 // Some other thread already set up the array.
2162 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2163 __kmp_gtid_from_thread( this_thr ) ) );
2164 return;
2165 }
2166 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2167 KMP_DEBUG_ASSERT( threads_data != NULL );
2168
2169 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2170 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2171 {
2172 // Release any threads sleeping at the barrier, so that they can steal
2173 // tasks and execute them. In extra barrier mode, tasks do not sleep
2174 // at the separate tasking barrier, so this isn't a problem.
2175 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002176 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002177 kmp_info_t *thread = threads_data[i].td.td_thr;
2178
2179 if (i == this_thr->th.th_info.ds.ds_tid) {
2180 continue;
2181 }
2182 // Since we haven't locked the thread's suspend mutex lock at this
2183 // point, there is a small window where a thread might be putting
2184 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002185 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002186 // see if other threads are sleeping (using the same random
2187 // mechanism that is used for task stealing) and awakens them if
2188 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002189 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002190 {
2191 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2192 __kmp_gtid_from_thread( this_thr ),
2193 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002194 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002195 }
2196 else {
2197 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2198 __kmp_gtid_from_thread( this_thr ),
2199 __kmp_gtid_from_thread( thread ) ) );
2200 }
2201 }
2202 }
2203
2204 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2205 __kmp_gtid_from_thread( this_thr ) ) );
2206}
2207
2208
2209/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002210/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00002211 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2212 * like a shadow of the kmp_team_t data struct, with a different lifetime.
2213 * After a child * thread checks into a barrier and calls __kmp_release() from
2214 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2215 * longer assume that the kmp_team_t structure is intact (at any moment, the
2216 * master thread may exit the barrier code and free the team data structure,
2217 * and return the threads to the thread pool).
2218 *
2219 * This does not work with the the tasking code, as the thread is still
2220 * expected to participate in the execution of any tasks that may have been
2221 * spawned my a member of the team, and the thread still needs access to all
2222 * to each thread in the team, so that it can steal work from it.
2223 *
2224 * Enter the existence of the kmp_task_team_t struct. It employs a reference
2225 * counting mechanims, and is allocated by the master thread before calling
2226 * __kmp_<barrier_kind>_release, and then is release by the last thread to
2227 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2228 * of the kmp_task_team_t structs for consecutive barriers can overlap
2229 * (and will, unless the master thread is the last thread to exit the barrier
2230 * release phase, which is not typical).
2231 *
2232 * The existence of such a struct is useful outside the context of tasking,
2233 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2234 * so that any performance differences show up when comparing the 2.5 vs. 3.0
2235 * libraries.
2236 *
2237 * We currently use the existence of the threads array as an indicator that
2238 * tasks were spawned since the last barrier. If the structure is to be
2239 * useful outside the context of tasking, then this will have to change, but
2240 * not settting the field minimizes the performance impact of tasking on
2241 * barriers, when no explicit tasks were spawned (pushed, actually).
2242 */
2243
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002244
Jim Cownie5e8470a2013-09-27 10:38:44 +00002245static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2246// Lock for task team data structures
2247static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2248
2249
2250//------------------------------------------------------------------------------
2251// __kmp_alloc_task_deque:
2252// Allocates a task deque for a particular thread, and initialize the necessary
2253// data structures relating to the deque. This only happens once per thread
2254// per task team since task teams are recycled.
2255// No lock is needed during allocation since each thread allocates its own
2256// deque.
2257
2258static void
2259__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2260{
2261 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2262 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2263
2264 // Initialize last stolen task field to "none"
2265 thread_data -> td.td_deque_last_stolen = -1;
2266
2267 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2268 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2269 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2270
2271 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002272 __kmp_gtid_from_thread( thread ), INITIAL_TASK_DEQUE_SIZE, thread_data ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002273 // Allocate space for task deque, and zero the deque
2274 // Cannot use __kmp_thread_calloc() because threads not around for
2275 // kmp_reap_task_team( ).
2276 thread_data -> td.td_deque = (kmp_taskdata_t **)
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002277 __kmp_allocate( INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2278 thread_data -> td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002279}
2280
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002281//------------------------------------------------------------------------------
2282// __kmp_realloc_task_deque:
2283// Re-allocates a task deque for a particular thread, copies the content from the old deque
2284// and adjusts the necessary data structures relating to the deque.
2285// This operation must be done with a the deque_lock being held
2286
2287static void __kmp_realloc_task_deque ( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2288{
2289 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
2290 kmp_int32 new_size = 2 * size;
2291
2292 KE_TRACE( 10, ( "__kmp_realloc_task_deque: T#%d reallocating deque[from %d to %d] for thread_data %p\n",
2293 __kmp_gtid_from_thread( thread ), size, new_size, thread_data ) );
2294
2295 kmp_taskdata_t ** new_deque = (kmp_taskdata_t **) __kmp_allocate( new_size * sizeof(kmp_taskdata_t *));
2296
2297 int i,j;
2298 for ( i = thread_data->td.td_deque_head, j = 0; j < size; i = (i+1) & TASK_DEQUE_MASK(thread_data->td), j++ )
2299 new_deque[j] = thread_data->td.td_deque[i];
2300
2301 __kmp_free(thread_data->td.td_deque);
2302
2303 thread_data -> td.td_deque_head = 0;
2304 thread_data -> td.td_deque_tail = size;
2305 thread_data -> td.td_deque = new_deque;
2306 thread_data -> td.td_deque_size = new_size;
2307}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002308
2309//------------------------------------------------------------------------------
2310// __kmp_free_task_deque:
2311// Deallocates a task deque for a particular thread.
2312// Happens at library deallocation so don't need to reset all thread data fields.
2313
2314static void
2315__kmp_free_task_deque( kmp_thread_data_t *thread_data )
2316{
2317 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2318
2319 if ( thread_data -> td.td_deque != NULL ) {
2320 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2321 __kmp_free( thread_data -> td.td_deque );
2322 thread_data -> td.td_deque = NULL;
2323 }
2324 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2325
2326#ifdef BUILD_TIED_TASK_STACK
2327 // GEH: Figure out what to do here for td_susp_tied_tasks
2328 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2329 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2330 }
2331#endif // BUILD_TIED_TASK_STACK
2332}
2333
2334
2335//------------------------------------------------------------------------------
2336// __kmp_realloc_task_threads_data:
2337// Allocates a threads_data array for a task team, either by allocating an initial
2338// array or enlarging an existing array. Only the first thread to get the lock
2339// allocs or enlarges the array and re-initializes the array eleemnts.
2340// That thread returns "TRUE", the rest return "FALSE".
2341// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2342// The current size is given by task_team -> tt.tt_max_threads.
2343
2344static int
2345__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2346{
2347 kmp_thread_data_t ** threads_data_p;
2348 kmp_int32 nthreads, maxthreads;
2349 int is_init_thread = FALSE;
2350
2351 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2352 // Already reallocated and initialized.
2353 return FALSE;
2354 }
2355
2356 threads_data_p = & task_team -> tt.tt_threads_data;
2357 nthreads = task_team -> tt.tt_nproc;
2358 maxthreads = task_team -> tt.tt_max_threads;
2359
2360 // All threads must lock when they encounter the first task of the implicit task
2361 // region to make sure threads_data fields are (re)initialized before used.
2362 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2363
2364 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2365 // first thread to enable tasking
2366 kmp_team_t *team = thread -> th.th_team;
2367 int i;
2368
2369 is_init_thread = TRUE;
2370 if ( maxthreads < nthreads ) {
2371
2372 if ( *threads_data_p != NULL ) {
2373 kmp_thread_data_t *old_data = *threads_data_p;
2374 kmp_thread_data_t *new_data = NULL;
2375
2376 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2377 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2378 __kmp_gtid_from_thread( thread ), task_team,
2379 nthreads, maxthreads ) );
2380 // Reallocate threads_data to have more elements than current array
2381 // Cannot use __kmp_thread_realloc() because threads not around for
2382 // kmp_reap_task_team( ). Note all new array entries are initialized
2383 // to zero by __kmp_allocate().
2384 new_data = (kmp_thread_data_t *)
2385 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2386 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002387 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002388 (void *) old_data,
2389 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002390
2391#ifdef BUILD_TIED_TASK_STACK
2392 // GEH: Figure out if this is the right thing to do
2393 for (i = maxthreads; i < nthreads; i++) {
2394 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2395 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2396 }
2397#endif // BUILD_TIED_TASK_STACK
2398 // Install the new data and free the old data
2399 (*threads_data_p) = new_data;
2400 __kmp_free( old_data );
2401 }
2402 else {
2403 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2404 "threads data for task_team %p, size = %d\n",
2405 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2406 // Make the initial allocate for threads_data array, and zero entries
2407 // Cannot use __kmp_thread_calloc() because threads not around for
2408 // kmp_reap_task_team( ).
2409 *threads_data_p = (kmp_thread_data_t *)
2410 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2411#ifdef BUILD_TIED_TASK_STACK
2412 // GEH: Figure out if this is the right thing to do
2413 for (i = 0; i < nthreads; i++) {
2414 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2415 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2416 }
2417#endif // BUILD_TIED_TASK_STACK
2418 }
2419 task_team -> tt.tt_max_threads = nthreads;
2420 }
2421 else {
2422 // If array has (more than) enough elements, go ahead and use it
2423 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2424 }
2425
2426 // initialize threads_data pointers back to thread_info structures
2427 for (i = 0; i < nthreads; i++) {
2428 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2429 thread_data -> td.td_thr = team -> t.t_threads[i];
2430
2431 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2432 // The last stolen field survives across teams / barrier, and the number
2433 // of threads may have changed. It's possible (likely?) that a new
2434 // parallel region will exhibit the same behavior as the previous region.
2435 thread_data -> td.td_deque_last_stolen = -1;
2436 }
2437 }
2438
2439 KMP_MB();
2440 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2441 }
2442
2443 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2444 return is_init_thread;
2445}
2446
2447
2448//------------------------------------------------------------------------------
2449// __kmp_free_task_threads_data:
2450// Deallocates a threads_data array for a task team, including any attached
2451// tasking deques. Only occurs at library shutdown.
2452
2453static void
2454__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2455{
2456 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2457 if ( task_team -> tt.tt_threads_data != NULL ) {
2458 int i;
2459 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2460 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2461 }
2462 __kmp_free( task_team -> tt.tt_threads_data );
2463 task_team -> tt.tt_threads_data = NULL;
2464 }
2465 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2466}
2467
2468
2469//------------------------------------------------------------------------------
2470// __kmp_allocate_task_team:
2471// Allocates a task team associated with a specific team, taking it from
2472// the global task team free list if possible. Also initializes data structures.
2473
2474static kmp_task_team_t *
2475__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2476{
2477 kmp_task_team_t *task_team = NULL;
2478 int nthreads;
2479
2480 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2481 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2482
2483 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2484 // Take a task team from the task team pool
2485 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2486 if (__kmp_free_task_teams != NULL) {
2487 task_team = __kmp_free_task_teams;
2488 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2489 task_team -> tt.tt_next = NULL;
2490 }
2491 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2492 }
2493
2494 if (task_team == NULL) {
2495 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2496 "task team for team %p\n",
2497 __kmp_gtid_from_thread( thread ), team ) );
2498 // Allocate a new task team if one is not available.
2499 // Cannot use __kmp_thread_malloc() because threads not around for
2500 // kmp_reap_task_team( ).
2501 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2502 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2503 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2504 //task_team -> tt.tt_max_threads = 0;
2505 //task_team -> tt.tt_next = NULL;
2506 }
2507
2508 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002509#if OMP_41_ENABLED
2510 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2511#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002512 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2513
Jim Cownie5e8470a2013-09-27 10:38:44 +00002514 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2515 TCW_4( task_team -> tt.tt_active, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002516
Jonathan Peyton54127982015-11-04 21:37:48 +00002517 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
2518 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002519 return task_team;
2520}
2521
2522
2523//------------------------------------------------------------------------------
2524// __kmp_free_task_team:
2525// Frees the task team associated with a specific thread, and adds it
2526// to the global task team free list.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002527
Jonathan Peyton54127982015-11-04 21:37:48 +00002528void
Jim Cownie5e8470a2013-09-27 10:38:44 +00002529__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2530{
2531 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2532 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2533
Jim Cownie5e8470a2013-09-27 10:38:44 +00002534 // Put task team back on free list
2535 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2536
2537 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2538 task_team -> tt.tt_next = __kmp_free_task_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002539 TCW_PTR(__kmp_free_task_teams, task_team);
2540
2541 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2542}
2543
2544
2545//------------------------------------------------------------------------------
2546// __kmp_reap_task_teams:
2547// Free all the task teams on the task team free list.
2548// Should only be done during library shutdown.
2549// Cannot do anything that needs a thread structure or gtid since they are already gone.
2550
2551void
2552__kmp_reap_task_teams( void )
2553{
2554 kmp_task_team_t *task_team;
2555
2556 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2557 // Free all task_teams on the free list
2558 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2559 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2560 __kmp_free_task_teams = task_team -> tt.tt_next;
2561 task_team -> tt.tt_next = NULL;
2562
2563 // Free threads_data if necessary
2564 if ( task_team -> tt.tt_threads_data != NULL ) {
2565 __kmp_free_task_threads_data( task_team );
2566 }
2567 __kmp_free( task_team );
2568 }
2569 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2570 }
2571}
2572
Jim Cownie5e8470a2013-09-27 10:38:44 +00002573//------------------------------------------------------------------------------
2574// __kmp_wait_to_unref_task_teams:
2575// Some threads could still be in the fork barrier release code, possibly
2576// trying to steal tasks. Wait for each thread to unreference its task team.
2577//
2578void
2579__kmp_wait_to_unref_task_teams(void)
2580{
2581 kmp_info_t *thread;
2582 kmp_uint32 spins;
2583 int done;
2584
2585 KMP_INIT_YIELD( spins );
2586
Jim Cownie5e8470a2013-09-27 10:38:44 +00002587 for (;;) {
2588 done = TRUE;
2589
2590 // TODO: GEH - this may be is wrong because some sync would be necessary
2591 // in case threads are added to the pool during the traversal.
2592 // Need to verify that lock for thread pool is held when calling
2593 // this routine.
2594 for (thread = (kmp_info_t *)__kmp_thread_pool;
2595 thread != NULL;
2596 thread = thread->th.th_next_pool)
2597 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002598#if KMP_OS_WINDOWS
2599 DWORD exit_val;
2600#endif
2601 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2602 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2603 __kmp_gtid_from_thread( thread ) ) );
2604 continue;
2605 }
2606#if KMP_OS_WINDOWS
2607 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2608 if (!__kmp_is_thread_alive(thread, &exit_val)) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002609 thread->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002610 continue;
2611 }
2612#endif
2613
2614 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2615
2616 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2617 __kmp_gtid_from_thread( thread ) ) );
2618
2619 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002620 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002621 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002622 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002623 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2624 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002625 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002626 }
2627 }
2628 }
2629 if (done) {
2630 break;
2631 }
2632
2633 // If we are oversubscribed,
2634 // or have waited a bit (and library mode is throughput), yield.
2635 // Pause is in the following code.
2636 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2637 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2638 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002639}
2640
2641
2642//------------------------------------------------------------------------------
2643// __kmp_task_team_setup: Create a task_team for the current team, but use
2644// an already created, unused one if it already exists.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002645void
Jonathan Peyton54127982015-11-04 21:37:48 +00002646__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002647{
2648 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2649
Jonathan Peyton54127982015-11-04 21:37:48 +00002650 // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
2651 // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
Jonathan Peyton61118492016-05-20 19:03:38 +00002652 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002653 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002654 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002655 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002656 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002657 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002658
Jonathan Peyton61118492016-05-20 19:03:38 +00002659 // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is
Jonathan Peyton54127982015-11-04 21:37:48 +00002660 // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
2661 // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
Jonathan Peyton61118492016-05-20 19:03:38 +00002662 // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for
Jonathan Peyton54127982015-11-04 21:37:48 +00002663 // serialized teams.
Jonathan Peytone1dad192015-11-30 20:05:13 +00002664 if (team->t.t_nproc > 1) {
2665 int other_team = 1 - this_thr->th.th_task_state;
2666 if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
2667 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2668 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2669 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2670 ((team != NULL) ? team->t.t_id : -1), other_team ));
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002671 }
Jonathan Peytone1dad192015-11-30 20:05:13 +00002672 else { // Leave the old task team struct in place for the upcoming region; adjust as needed
2673 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2674 if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
2675 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2676 TCW_4(task_team->tt.tt_found_tasks, FALSE);
2677#if OMP_41_ENABLED
2678 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2679#endif
2680 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
2681 TCW_4(task_team->tt.tt_active, TRUE );
2682 }
2683 // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
2684 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
2685 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2686 ((team != NULL) ? team->t.t_id : -1), other_team ));
2687 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002688 }
2689}
2690
2691
2692//------------------------------------------------------------------------------
2693// __kmp_task_team_sync: Propagation of task team data from team to threads
2694// which happens just after the release phase of a team barrier. This may be
2695// called by any thread, but only for teams with # threads > 1.
2696
2697void
2698__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2699{
2700 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2701
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002702 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002703 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002704 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2705 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jonathan Peyton54127982015-11-04 21:37:48 +00002706 KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002707 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2708 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002709}
2710
2711
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002712//--------------------------------------------------------------------------------------------
2713// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
Jonathan Peyton54127982015-11-04 21:37:48 +00002714// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created.
2715// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
2716// optionally as the last argument. When wait is zero, master thread does not wait for
2717// unfinished_threads to reach 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002718void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002719__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002720 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jonathan Peyton54127982015-11-04 21:37:48 +00002721 , int wait)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002722{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002723 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002724
2725 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2726 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2727
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002728 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002729 if (wait) {
2730 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2731 __kmp_gtid_from_thread(this_thr), task_team));
2732 // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
2733 // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
2734 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2735 flag.wait(this_thr, TRUE
2736 USE_ITT_BUILD_ARG(itt_sync_obj));
2737 }
2738 // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
2739 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
2740 "setting active to false, setting local and team's pointer to NULL\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002741 __kmp_gtid_from_thread(this_thr), task_team));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002742#if OMP_41_ENABLED
2743 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2744 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2745#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002746 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002747#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002748 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2749 KMP_MB();
2750
2751 TCW_PTR(this_thr->th.th_task_team, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002752 }
2753}
2754
2755
2756//------------------------------------------------------------------------------
2757// __kmp_tasking_barrier:
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002758// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002759// Internal function to execute all tasks prior to a regular barrier or a
2760// join barrier. It is a full barrier itself, which unfortunately turns
2761// regular barriers into double barriers and join barriers into 1 1/2
2762// barriers.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002763void
2764__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2765{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002766 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002767 int flag = FALSE;
2768 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2769
2770#if USE_ITT_BUILD
2771 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2772#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002773 kmp_flag_32 spin_flag(spin, 0U);
2774 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2775 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002776#if USE_ITT_BUILD
2777 // TODO: What about itt_sync_obj??
2778 KMP_FSYNC_SPIN_PREPARE( spin );
2779#endif /* USE_ITT_BUILD */
2780
2781 if( TCR_4(__kmp_global.g.g_done) ) {
2782 if( __kmp_global.g.g_abort )
2783 __kmp_abort_thread( );
2784 break;
2785 }
2786 KMP_YIELD( TRUE ); // GH: We always yield here
2787 }
2788#if USE_ITT_BUILD
2789 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2790#endif /* USE_ITT_BUILD */
2791}
2792
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002793
2794#if OMP_41_ENABLED
2795
2796/* __kmp_give_task puts a task into a given thread queue if:
Jonathan Peytonff684e42016-02-11 22:58:29 +00002797 - the queue for that thread was created
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002798 - there's space in that queue
2799
2800 Because of this, __kmp_push_task needs to check if there's space after getting the lock
2801 */
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002802static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task, kmp_int32 pass )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002803{
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002804 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002805 kmp_task_team_t * task_team = taskdata->td_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002806
2807 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2808
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002809 // If task_team is NULL something went really bad...
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002810 KMP_DEBUG_ASSERT( task_team != NULL );
2811
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002812 bool result = false;
2813 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2814
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002815 if (thread_data -> td.td_deque == NULL ) {
2816 // There's no queue in this thread, go find another one
2817 // We're guaranteed that at least one thread has a queue
2818 KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2819 return result;
2820 }
2821
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002822 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002823 {
2824 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002825
2826 // if this deque is bigger than the pass ratio give a chance to another thread
2827 if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass ) return result;
2828
2829 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2830 __kmp_realloc_task_deque(thread,thread_data);
2831
2832 } else {
2833
2834 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2835
2836 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
2837 {
2838 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2839
2840 // if this deque is bigger than the pass ratio give a chance to another thread
2841 if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass )
2842 goto release_and_exit;
2843
2844 __kmp_realloc_task_deque(thread,thread_data);
2845 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002846 }
2847
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002848 // lock is held here, and there is space in the deque
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002849
2850 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2851 // Wrap index.
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002852 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002853 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2854
2855 result = true;
Jonathan Peyton1406f012015-05-22 22:35:51 +00002856 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002857
2858release_and_exit:
2859 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2860
2861 return result;
2862}
2863
2864
2865/* The finish of the a proxy tasks is divided in two pieces:
2866 - the top half is the one that can be done from a thread outside the team
2867 - the bottom half must be run from a them within the team
2868
2869 In order to run the bottom half the task gets queued back into one of the threads of the team.
2870 Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2871 So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2872 - things that can be run before queuing the bottom half
2873 - things that must be run after queuing the bottom half
2874
2875 This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2876 we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2877*/
2878
2879static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2880{
2881 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2882 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2883 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2884 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2885
2886 taskdata -> td_flags.complete = 1; // mark the task as completed
2887
2888 if ( taskdata->td_taskgroup )
2889 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2890
2891 // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
Paul Osmialowski52bef532016-05-07 00:00:00 +00002892 TCI_4(taskdata->td_incomplete_child_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002893}
2894
2895static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2896{
2897 kmp_int32 children = 0;
2898
2899 // Predecrement simulated by "- 1" calculation
2900 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2901 KMP_DEBUG_ASSERT( children >= 0 );
2902
2903 // Remove the imaginary children
Paul Osmialowski52bef532016-05-07 00:00:00 +00002904 TCD_4(taskdata->td_incomplete_child_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002905}
2906
2907static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2908{
2909 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2910 kmp_info_t * thread = __kmp_threads[ gtid ];
2911
2912 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2913 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2914
2915 // We need to wait to make sure the top half is finished
2916 // Spinning here should be ok as this should happen quickly
2917 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2918
2919 __kmp_release_deps(gtid,taskdata);
2920 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2921}
2922
2923/*!
2924@ingroup TASKING
2925@param gtid Global Thread ID of encountering thread
2926@param ptask Task which execution is completed
2927
2928Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
2929*/
2930void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2931{
2932 KMP_DEBUG_ASSERT( ptask != NULL );
2933 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2934 KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2935
2936 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2937
2938 __kmp_first_top_half_finish_proxy(taskdata);
2939 __kmp_second_top_half_finish_proxy(taskdata);
2940 __kmp_bottom_half_finish_proxy(gtid,ptask);
2941
2942 KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2943}
2944
2945/*!
2946@ingroup TASKING
2947@param ptask Task which execution is completed
2948
2949Execute the completation of a proxy task from a thread that could not belong to the team.
2950*/
2951void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2952{
2953 KMP_DEBUG_ASSERT( ptask != NULL );
2954 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2955
2956 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2957
2958 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2959
2960 __kmp_first_top_half_finish_proxy(taskdata);
2961
Jonathan Peytonff684e42016-02-11 22:58:29 +00002962 // Enqueue task to complete bottom half completion from a thread within the corresponding team
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002963 kmp_team_t * team = taskdata->td_team;
2964 kmp_int32 nthreads = team->t.t_nproc;
2965 kmp_info_t *thread;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002966
2967 //This should be similar to start_k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
2968 kmp_int32 start_k = 0;
2969 kmp_int32 pass = 1;
2970 kmp_int32 k = start_k;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002971
2972 do {
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002973 //For now we're just linearly trying to find a thread
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002974 thread = team->t.t_threads[k];
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002975 k = (k+1) % nthreads;
2976
2977 // we did a full pass through all the threads
2978 if ( k == start_k ) pass = pass << 1;
2979
2980 } while ( !__kmp_give_task( thread, k, ptask, pass ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002981
2982 __kmp_second_top_half_finish_proxy(taskdata);
2983
2984 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2985}
2986
Jonathan Peyton283a2152016-03-02 22:47:51 +00002987//---------------------------------------------------------------------------------
2988// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task for taskloop
2989//
2990// thread: allocating thread
2991// task_src: pointer to source task to be duplicated
2992// returns: a pointer to the allocated kmp_task_t structure (task).
2993kmp_task_t *
2994__kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src )
2995{
2996 kmp_task_t *task;
2997 kmp_taskdata_t *taskdata;
2998 kmp_taskdata_t *taskdata_src;
2999 kmp_taskdata_t *parent_task = thread->th.th_current_task;
3000 size_t shareds_offset;
3001 size_t task_size;
3002
3003 KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) );
3004 taskdata_src = KMP_TASK_TO_TASKDATA( task_src );
3005 KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL ); // it should not be proxy task
3006 KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT );
3007 task_size = taskdata_src->td_size_alloc;
3008
3009 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
3010 KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) );
3011 #if USE_FAST_MEMORY
3012 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size );
3013 #else
3014 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size );
3015 #endif /* USE_FAST_MEMORY */
3016 KMP_MEMCPY(taskdata, taskdata_src, task_size);
3017
3018 task = KMP_TASKDATA_TO_TASK(taskdata);
3019
3020 // Initialize new task (only specific fields not affected by memcpy)
3021 taskdata->td_task_id = KMP_GEN_TASK_ID();
3022 if( task->shareds != NULL ) { // need setup shareds pointer
3023 shareds_offset = (char*)task_src->shareds - (char*)taskdata_src;
3024 task->shareds = &((char*)taskdata)[shareds_offset];
3025 KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (sizeof(void*)-1)) == 0 );
3026 }
3027 taskdata->td_alloc_thread = thread;
3028 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
3029
3030 // Only need to keep track of child task counts if team parallel and tasking not serialized
3031 if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) {
3032 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
3033 if ( parent_task->td_taskgroup )
3034 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
3035 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
3036 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT )
3037 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
3038 }
3039
3040 KA_TRACE(20, ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
3041 thread, taskdata, taskdata->td_parent) );
3042#if OMPT_SUPPORT
3043 __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (void*)task->routine);
3044#endif
3045 return task;
3046}
3047
3048// Routine optionally generated by th ecompiler for setting the lastprivate flag
3049// and calling needed constructors for private/firstprivate objects
3050// (used to form taskloop tasks from pattern task)
3051typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
3052
3053//---------------------------------------------------------------------------------
3054// __kmp_taskloop_linear: Start tasks of the taskloop linearly
3055//
3056// loc Source location information
3057// gtid Global thread ID
3058// task Task with whole loop iteration range
3059// lb Pointer to loop lower bound
3060// ub Pointer to loop upper bound
3061// st Loop stride
3062// sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3063// grainsize Schedule value if specified
3064// task_dup Tasks duplication routine
3065void
3066__kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
3067 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3068 int sched, kmp_uint64 grainsize, void *task_dup )
3069{
3070 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
3071 kmp_uint64 tc;
3072 kmp_uint64 lower = *lb; // compiler provides global bounds here
3073 kmp_uint64 upper = *ub;
Samuel Antao11e4c532016-03-12 00:55:17 +00003074 kmp_uint64 i, num_tasks = 0, extras = 0;
Jonathan Peyton283a2152016-03-02 22:47:51 +00003075 kmp_info_t *thread = __kmp_threads[gtid];
3076 kmp_taskdata_t *current_task = thread->th.th_current_task;
3077 kmp_task_t *next_task;
3078 kmp_int32 lastpriv = 0;
3079 size_t lower_offset = (char*)lb - (char*)task; // remember offset of lb in the task structure
3080 size_t upper_offset = (char*)ub - (char*)task; // remember offset of ub in the task structure
3081
3082 // compute trip count
3083 if ( st == 1 ) { // most common case
3084 tc = upper - lower + 1;
3085 } else if ( st < 0 ) {
3086 tc = (lower - upper) / (-st) + 1;
3087 } else { // st > 0
3088 tc = (upper - lower) / st + 1;
3089 }
3090 if(tc == 0) {
3091 // free the pattern task and exit
3092 __kmp_task_start( gtid, task, current_task );
3093 // do not execute anything for zero-trip loop
3094 __kmp_task_finish( gtid, task, current_task );
3095 return;
3096 }
3097
3098 // compute num_tasks/grainsize based on the input provided
3099 switch( sched ) {
3100 case 0: // no schedule clause specified, we can choose the default
3101 // let's try to schedule (team_size*10) tasks
3102 grainsize = thread->th.th_team_nproc * 10;
3103 case 2: // num_tasks provided
3104 if( grainsize > tc ) {
3105 num_tasks = tc; // too big num_tasks requested, adjust values
3106 grainsize = 1;
3107 extras = 0;
3108 } else {
3109 num_tasks = grainsize;
3110 grainsize = tc / num_tasks;
3111 extras = tc % num_tasks;
3112 }
3113 break;
3114 case 1: // grainsize provided
3115 if( grainsize > tc ) {
3116 num_tasks = 1; // too big grainsize requested, adjust values
3117 grainsize = tc;
3118 extras = 0;
3119 } else {
3120 num_tasks = tc / grainsize;
3121 grainsize = tc / num_tasks; // adjust grainsize for balanced distribution of iterations
3122 extras = tc % num_tasks;
3123 }
3124 break;
3125 default:
3126 KMP_ASSERT2(0, "unknown scheduling of taskloop");
3127 }
3128 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3129 KMP_DEBUG_ASSERT(num_tasks > extras);
3130 KMP_DEBUG_ASSERT(num_tasks > 0);
3131
3132 // Main loop, launch num_tasks tasks, assign grainsize iterations each task
3133 for( i = 0; i < num_tasks; ++i ) {
3134 kmp_uint64 chunk_minus_1;
3135 if( extras == 0 ) {
3136 chunk_minus_1 = grainsize - 1;
3137 } else {
3138 chunk_minus_1 = grainsize;
3139 --extras; // first extras iterations get bigger chunk (grainsize+1)
3140 }
3141 upper = lower + st * chunk_minus_1;
3142 if( i == num_tasks - 1 ) {
3143 // schedule the last task, set lastprivate flag
3144 lastpriv = 1;
3145#if KMP_DEBUG
3146 if( st == 1 )
3147 KMP_DEBUG_ASSERT(upper == *ub);
3148 else if( st > 0 )
3149 KMP_DEBUG_ASSERT(upper+st > *ub);
3150 else
3151 KMP_DEBUG_ASSERT(upper+st < *ub);
3152#endif
3153 }
3154 next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
3155 *(kmp_uint64*)((char*)next_task + lower_offset) = lower; // adjust task-specific bounds
3156 *(kmp_uint64*)((char*)next_task + upper_offset) = upper;
3157 if( ptask_dup != NULL )
3158 ptask_dup(next_task, task, lastpriv); // set lastprivate flag, construct fistprivates, etc.
3159 __kmp_omp_task(gtid, next_task, true); // schedule new task
3160 lower = upper + st; // adjust lower bound for the next iteration
3161 }
3162 // free the pattern task and exit
3163 __kmp_task_start( gtid, task, current_task );
3164 // do not execute the pattern task, just do bookkeeping
3165 __kmp_task_finish( gtid, task, current_task );
3166}
3167
3168/*!
3169@ingroup TASKING
3170@param loc Source location information
3171@param gtid Global thread ID
3172@param task Task structure
3173@param if_val Value of the if clause
3174@param lb Pointer to loop lower bound
3175@param ub Pointer to loop upper bound
3176@param st Loop stride
3177@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
3178@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3179@param grainsize Schedule value if specified
3180@param task_dup Tasks duplication routine
3181
3182Execute the taskloop construct.
3183*/
3184void
3185__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
3186 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3187 int nogroup, int sched, kmp_uint64 grainsize, void *task_dup )
3188{
3189 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
3190 KMP_DEBUG_ASSERT( task != NULL );
3191
3192 KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n",
3193 gtid, taskdata, *lb, *ub, st, grainsize, sched));
3194
3195 // check if clause value first
3196 if( if_val == 0 ) { // if(0) specified, mark task as serial
3197 taskdata->td_flags.task_serial = 1;
3198 taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
3199 }
3200 if( nogroup == 0 ) {
3201 __kmpc_taskgroup( loc, gtid );
3202 }
3203
3204 if( 1 /* AC: use some heuristic here to choose task scheduling method */ ) {
3205 __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup );
3206 }
3207
3208 if( nogroup == 0 ) {
3209 __kmpc_end_taskgroup( loc, gtid );
3210 }
3211 KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
3212}
3213
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003214#endif