blob: 0c806c21ee6ddd2e35262b5cb7c0bfcf418eddaa [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jonathan Peyton45be4502015-08-11 21:36:41 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanove5f44922015-04-29 16:22:07 +000022#if OMPT_SUPPORT
23#include "ompt-specific.h"
24#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
Jim Cownie5e8470a2013-09-27 10:38:44 +000026/* ------------------------------------------------------------------------ */
27/* ------------------------------------------------------------------------ */
28
29
30/* forward declaration */
31static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
34
Jonathan Peytondf6818b2016-06-14 17:57:47 +000035#ifdef OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +000036static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
37#endif
38
Jim Cownie5e8470a2013-09-27 10:38:44 +000039#ifdef BUILD_TIED_TASK_STACK
40
41//---------------------------------------------------------------------------
42// __kmp_trace_task_stack: print the tied tasks from the task stack in order
43// from top do bottom
44//
45// gtid: global thread identifier for thread containing stack
46// thread_data: thread data for task team thread containing stack
47// threshold: value above which the trace statement triggers
48// location: string identifying call site of this function (for trace)
49
50static void
51__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
52{
53 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
54 kmp_taskdata_t **stack_top = task_stack -> ts_top;
55 kmp_int32 entries = task_stack -> ts_entries;
56 kmp_taskdata_t *tied_task;
57
58 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
59 "first_block = %p, stack_top = %p \n",
60 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
61
62 KMP_DEBUG_ASSERT( stack_top != NULL );
63 KMP_DEBUG_ASSERT( entries > 0 );
64
65 while ( entries != 0 )
66 {
67 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
68 // fix up ts_top if we need to pop from previous block
69 if ( entries & TASK_STACK_INDEX_MASK == 0 )
70 {
71 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
72
73 stack_block = stack_block -> sb_prev;
74 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
75 }
76
77 // finish bookkeeping
78 stack_top--;
79 entries--;
80
81 tied_task = * stack_top;
82
83 KMP_DEBUG_ASSERT( tied_task != NULL );
84 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
85
86 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
87 "stack_top=%p, tied_task=%p\n",
88 location, gtid, entries, stack_top, tied_task ) );
89 }
90 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
91
92 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
93 location, gtid ) );
94}
95
96//---------------------------------------------------------------------------
97// __kmp_init_task_stack: initialize the task stack for the first time
98// after a thread_data structure is created.
99// It should not be necessary to do this again (assuming the stack works).
100//
101// gtid: global thread identifier of calling thread
102// thread_data: thread data for task team thread containing stack
103
104static void
105__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
106{
107 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
108 kmp_stack_block_t *first_block;
109
110 // set up the first block of the stack
111 first_block = & task_stack -> ts_first_block;
112 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
113 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
114
115 // initialize the stack to be empty
116 task_stack -> ts_entries = TASK_STACK_EMPTY;
117 first_block -> sb_next = NULL;
118 first_block -> sb_prev = NULL;
119}
120
121
122//---------------------------------------------------------------------------
123// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
124//
125// gtid: global thread identifier for calling thread
126// thread_data: thread info for thread containing stack
127
128static void
129__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
130{
131 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
132 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
133
134 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
135 // free from the second block of the stack
136 while ( stack_block != NULL ) {
137 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
138
139 stack_block -> sb_next = NULL;
140 stack_block -> sb_prev = NULL;
141 if (stack_block != & task_stack -> ts_first_block) {
142 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
143 }
144 stack_block = next_block;
145 }
146 // initialize the stack to be empty
147 task_stack -> ts_entries = 0;
148 task_stack -> ts_top = NULL;
149}
150
151
152//---------------------------------------------------------------------------
153// __kmp_push_task_stack: Push the tied task onto the task stack.
154// Grow the stack if necessary by allocating another block.
155//
156// gtid: global thread identifier for calling thread
157// thread: thread info for thread containing stack
158// tied_task: the task to push on the stack
159
160static void
161__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
162{
163 // GEH - need to consider what to do if tt_threads_data not allocated yet
164 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
165 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
166 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
167
168 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
169 return; // Don't push anything on stack if team or team tasks are serialized
170 }
171
172 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
173 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
174
175 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
176 gtid, thread, tied_task ) );
177 // Store entry
178 * (task_stack -> ts_top) = tied_task;
179
180 // Do bookkeeping for next push
181 task_stack -> ts_top++;
182 task_stack -> ts_entries++;
183
184 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
185 {
186 // Find beginning of this task block
187 kmp_stack_block_t *stack_block =
188 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
189
190 // Check if we already have a block
191 if ( stack_block -> sb_next != NULL )
192 { // reset ts_top to beginning of next block
193 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
194 }
195 else
196 { // Alloc new block and link it up
197 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
198 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
199
200 task_stack -> ts_top = & new_block -> sb_block[0];
201 stack_block -> sb_next = new_block;
202 new_block -> sb_prev = stack_block;
203 new_block -> sb_next = NULL;
204
205 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
206 gtid, tied_task, new_block ) );
207 }
208 }
209 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
210}
211
212//---------------------------------------------------------------------------
213// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
214// the task, just check to make sure it matches the ending task passed in.
215//
216// gtid: global thread identifier for the calling thread
217// thread: thread info structure containing stack
218// tied_task: the task popped off the stack
219// ending_task: the task that is ending (should match popped task)
220
221static void
222__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
223{
224 // GEH - need to consider what to do if tt_threads_data not allocated yet
225 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
226 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
227 kmp_taskdata_t *tied_task;
228
229 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
230 return; // Don't pop anything from stack if team or team tasks are serialized
231 }
232
233 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
234 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
235
236 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
237
238 // fix up ts_top if we need to pop from previous block
239 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
240 {
241 kmp_stack_block_t *stack_block =
242 (kmp_stack_block_t *) (task_stack -> ts_top) ;
243
244 stack_block = stack_block -> sb_prev;
245 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
246 }
247
248 // finish bookkeeping
249 task_stack -> ts_top--;
250 task_stack -> ts_entries--;
251
252 tied_task = * (task_stack -> ts_top );
253
254 KMP_DEBUG_ASSERT( tied_task != NULL );
255 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
256 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
257
258 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
259 return;
260}
261#endif /* BUILD_TIED_TASK_STACK */
262
263//---------------------------------------------------
264// __kmp_push_task: Add a task to the thread's deque
265
266static kmp_int32
267__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
268{
269 kmp_info_t * thread = __kmp_threads[ gtid ];
270 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
271 kmp_task_team_t * task_team = thread->th.th_task_team;
272 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
273 kmp_thread_data_t * thread_data;
274
275 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
276
Jonathan Peytone6643da2016-04-18 21:35:14 +0000277 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
278 // untied task needs to increment counter so that the task structure is not freed prematurely
279 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
280 KA_TRACE(20, ( "__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
281 gtid, counter, taskdata ) );
282 }
283
Jim Cownie5e8470a2013-09-27 10:38:44 +0000284 // The first check avoids building task_team thread data if serialized
285 if ( taskdata->td_flags.task_serial ) {
286 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
287 gtid, taskdata ) );
288 return TASK_NOT_PUSHED;
289 }
290
291 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
292 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000293 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000294 __kmp_enable_tasking( task_team, thread );
295 }
296 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
297 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
298
299 // Find tasking deque specific to encountering thread
300 thread_data = & task_team -> tt.tt_threads_data[ tid ];
301
302 // No lock needed since only owner can allocate
303 if (thread_data -> td.td_deque == NULL ) {
304 __kmp_alloc_task_deque( thread, thread_data );
305 }
306
307 // Check if deque is full
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000308 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000309 {
310 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
311 gtid, taskdata ) );
312 return TASK_NOT_PUSHED;
313 }
314
315 // Lock the deque for the task push operation
316 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
317
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000318#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000319 // Need to recheck as we can get a proxy task from a thread outside of OpenMP
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000320 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000321 {
322 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
323 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
324 gtid, taskdata ) );
325 return TASK_NOT_PUSHED;
326 }
327#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000328 // Must have room since no thread can add tasks but calling thread
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000329 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE(thread_data->td) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000330#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000331
332 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
333 // Wrap index.
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000334 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000335 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
336
337 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
338
339 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
340 "task=%p ntasks=%d head=%u tail=%u\n",
341 gtid, taskdata, thread_data->td.td_deque_ntasks,
342 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
343
344 return TASK_SUCCESSFULLY_PUSHED;
345}
346
347
348//-----------------------------------------------------------------------------------------
349// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
350// this_thr: thread structure to set current_task in.
351
352void
353__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
354{
355 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
356 "curtask_parent=%p\n",
357 0, this_thr, this_thr -> th.th_current_task,
358 this_thr -> th.th_current_task -> td_parent ) );
359
360 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
361
362 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
363 "curtask_parent=%p\n",
364 0, this_thr, this_thr -> th.th_current_task,
365 this_thr -> th.th_current_task -> td_parent ) );
366}
367
368
369//---------------------------------------------------------------------------------------
370// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
371// this_thr: thread structure to set up
372// team: team for implicit task data
373// tid: thread within team to set up
374
375void
376__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
377{
378 // current task of the thread is a parent of the new just created implicit tasks of new team
379 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
380 "parent_task=%p\n",
381 tid, this_thr, this_thr->th.th_current_task,
382 team->t.t_implicit_task_taskdata[tid].td_parent ) );
383
384 KMP_DEBUG_ASSERT (this_thr != NULL);
385
386 if( tid == 0 ) {
387 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
388 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
389 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
390 }
391 } else {
392 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
393 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
394 }
395
396 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
397 "parent_task=%p\n",
398 tid, this_thr, this_thr->th.th_current_task,
399 team->t.t_implicit_task_taskdata[tid].td_parent ) );
400}
401
402
403//----------------------------------------------------------------------
404// __kmp_task_start: bookkeeping for a task starting execution
405// GTID: global thread id of calling thread
406// task: task starting execution
407// current_task: task suspending
408
409static void
410__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
411{
412 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
413 kmp_info_t * thread = __kmp_threads[ gtid ];
414
415 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
416 gtid, taskdata, current_task) );
417
418 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
419
420 // mark currently executing task as suspended
421 // TODO: GEH - make sure root team implicit task is initialized properly.
422 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
423 current_task -> td_flags.executing = 0;
424
425 // Add task to stack if tied
426#ifdef BUILD_TIED_TASK_STACK
427 if ( taskdata -> td_flags.tiedness == TASK_TIED )
428 {
429 __kmp_push_task_stack( gtid, thread, taskdata );
430 }
431#endif /* BUILD_TIED_TASK_STACK */
432
433 // mark starting task as executing and as current task
434 thread -> th.th_current_task = taskdata;
435
Jonathan Peytone6643da2016-04-18 21:35:14 +0000436 KMP_DEBUG_ASSERT( taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
437 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000438 taskdata -> td_flags.started = 1;
439 taskdata -> td_flags.executing = 1;
440 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
441 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
442
443 // GEH TODO: shouldn't we pass some sort of location identifier here?
444 // APT: yes, we will pass location here.
445 // need to store current thread state (in a thread or taskdata structure)
446 // before setting work_state, otherwise wrong state is set after end of task
447
448 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
449 gtid, taskdata ) );
450
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000451#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000452 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000453 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
454 kmp_taskdata_t *parent = taskdata->td_parent;
455 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
456 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
457 parent ? &(parent->ompt_task_info.frame) : NULL,
458 taskdata->ompt_task_info.task_id,
459 taskdata->ompt_task_info.function);
460 }
461#endif
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000462#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
463 /* OMPT emit all dependences if requested by the tool */
464 if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
465 ompt_callbacks.ompt_callback(ompt_event_task_dependences))
466 {
467 ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
468 taskdata->ompt_task_info.task_id,
469 taskdata->ompt_task_info.deps,
470 taskdata->ompt_task_info.ndeps
471 );
472 /* We can now free the allocated memory for the dependencies */
473 KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps);
474 taskdata->ompt_task_info.deps = NULL;
475 taskdata->ompt_task_info.ndeps = 0;
476 }
477#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000478
Jim Cownie5e8470a2013-09-27 10:38:44 +0000479 return;
480}
481
482
483//----------------------------------------------------------------------
484// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
485// loc_ref: source location information; points to beginning of task block.
486// gtid: global thread number.
487// task: task thunk for the started task.
488
489void
490__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
491{
492 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
493 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
494
495 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
496 gtid, loc_ref, taskdata, current_task ) );
497
Jonathan Peytone6643da2016-04-18 21:35:14 +0000498 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
499 // untied task needs to increment counter so that the task structure is not freed prematurely
500 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
501 KA_TRACE(20, ( "__kmpc_omp_task_begin_if0: T#%d untied_count (%d) incremented for task %p\n",
502 gtid, counter, taskdata ) );
503 }
504
Jim Cownie5e8470a2013-09-27 10:38:44 +0000505 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
506 __kmp_task_start( gtid, task, current_task );
507
508 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
509 gtid, loc_ref, taskdata ) );
510
511 return;
512}
513
514#ifdef TASK_UNUSED
515//----------------------------------------------------------------------
516// __kmpc_omp_task_begin: report that a given task has started execution
517// NEVER GENERATED BY COMPILER, DEPRECATED!!!
518
519void
520__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
521{
522 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
523
524 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
525 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
526
527 __kmp_task_start( gtid, task, current_task );
528
529 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
530 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
531
532 return;
533}
534#endif // TASK_UNUSED
535
536
537//-------------------------------------------------------------------------------------
538// __kmp_free_task: free the current task space and the space for shareds
539// gtid: Global thread ID of calling thread
540// taskdata: task to free
541// thread: thread data structure of caller
542
543static void
544__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
545{
546 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
547 gtid, taskdata) );
548
549 // Check to make sure all flags and counters have the correct values
550 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
551 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
552 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
553 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
554 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
555 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
556
557 taskdata->td_flags.freed = 1;
558 // deallocate the taskdata and shared variable blocks associated with this task
559 #if USE_FAST_MEMORY
560 __kmp_fast_free( thread, taskdata );
561 #else /* ! USE_FAST_MEMORY */
562 __kmp_thread_free( thread, taskdata );
563 #endif
564
565 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
566 gtid, taskdata) );
567}
568
569//-------------------------------------------------------------------------------------
570// __kmp_free_task_and_ancestors: free the current task and ancestors without children
571//
572// gtid: Global thread ID of calling thread
573// taskdata: task to free
574// thread: thread data structure of caller
575
576static void
577__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
578{
Jonas Hahnfeld69f85112016-08-08 10:08:07 +0000579 // Proxy tasks must always be allowed to free their parents
580 // because they can be run in background even in serial mode.
581 kmp_int32 task_serial = taskdata->td_flags.task_serial && !taskdata->td_flags.proxy;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000582 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
583
Jonas Hahnfeld69f85112016-08-08 10:08:07 +0000584 kmp_int32 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
585 KMP_DEBUG_ASSERT( children >= 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000586
587 // Now, go up the ancestor tree to see if any ancestors can now be freed.
588 while ( children == 0 )
589 {
590 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
591
592 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
593 "and freeing itself\n", gtid, taskdata) );
594
595 // --- Deallocate my ancestor task ---
596 __kmp_free_task( gtid, taskdata, thread );
597
598 taskdata = parent_taskdata;
599
Jonas Hahnfeld69f85112016-08-08 10:08:07 +0000600 // Stop checking ancestors at implicit task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000601 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
Jonas Hahnfeld69f85112016-08-08 10:08:07 +0000602 if ( task_serial || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000603 return;
604
Jonas Hahnfeld69f85112016-08-08 10:08:07 +0000605 // Predecrement simulated by "- 1" calculation
606 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
607 KMP_DEBUG_ASSERT( children >= 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000608 }
609
610 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
611 "not freeing it yet\n", gtid, taskdata, children) );
612}
613
614//---------------------------------------------------------------------
615// __kmp_task_finish: bookkeeping to do when a task finishes execution
616// gtid: global thread ID for calling thread
617// task: task to be finished
618// resumed_task: task to be resumed. (may be NULL if task is serialized)
619
620static void
621__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
622{
623 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
624 kmp_info_t * thread = __kmp_threads[ gtid ];
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000625 kmp_task_team_t * task_team = thread->th.th_task_team; // might be NULL for serial teams...
Jim Cownie5e8470a2013-09-27 10:38:44 +0000626 kmp_int32 children = 0;
627
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000628#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000629 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000630 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
631 kmp_taskdata_t *parent = taskdata->td_parent;
632 ompt_callbacks.ompt_callback(ompt_event_task_end)(
633 taskdata->ompt_task_info.task_id);
634 }
635#endif
636
Jim Cownie5e8470a2013-09-27 10:38:44 +0000637 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
638 gtid, taskdata, resumed_task) );
639
640 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
641
642 // Pop task from stack if tied
643#ifdef BUILD_TIED_TASK_STACK
644 if ( taskdata -> td_flags.tiedness == TASK_TIED )
645 {
646 __kmp_pop_task_stack( gtid, thread, taskdata );
647 }
648#endif /* BUILD_TIED_TASK_STACK */
649
Jonathan Peytone6643da2016-04-18 21:35:14 +0000650 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
651 // untied task needs to check the counter so that the task structure is not freed prematurely
652 kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
653 KA_TRACE(20, ( "__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
654 gtid, counter, taskdata ) );
655 if ( counter > 0 ) {
656 // untied task is not done, to be continued possibly by other thread, do not free it now
657 if (resumed_task == NULL) {
658 KMP_DEBUG_ASSERT( taskdata->td_flags.task_serial );
659 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
660 }
661 thread->th.th_current_task = resumed_task; // restore current_task
662 resumed_task->td_flags.executing = 1; // resume previous task
663 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, resuming task %p\n",
664 gtid, taskdata, resumed_task) );
665 return;
666 }
667 }
668
Jim Cownie5e8470a2013-09-27 10:38:44 +0000669 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000670 taskdata -> td_flags.complete = 1; // mark the task as completed
671 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
672 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
673
674 // Only need to keep track of count if team parallel and tasking not serialized
675 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
676 // Predecrement simulated by "- 1" calculation
677 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
678 KMP_DEBUG_ASSERT( children >= 0 );
679#if OMP_40_ENABLED
680 if ( taskdata->td_taskgroup )
681 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000682#if OMP_45_ENABLED
683 }
684 // if we found proxy tasks there could exist a dependency chain
685 // with the proxy task as origin
686 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) || (task_team && task_team->tt.tt_found_proxy_tasks) ) {
687#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +0000688 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000689#endif
690 }
691
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000692 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
693 // Othertwise, if a task is executed immediately from the release_deps code
694 // the flag will be reset to 1 again by this same function
695 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
696 taskdata -> td_flags.executing = 0; // suspend the finishing task
697
Jim Cownie5e8470a2013-09-27 10:38:44 +0000698 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
699 gtid, taskdata, children) );
700
Jim Cownie181b4bb2013-12-23 17:28:57 +0000701#if OMP_40_ENABLED
702 /* If the tasks' destructor thunk flag has been set, we need to invoke the
703 destructor thunk that has been generated by the compiler.
704 The code is placed here, since at this point other tasks might have been released
705 hence overlapping the destructor invokations with some other work in the
706 released tasks. The OpenMP spec is not specific on when the destructors are
707 invoked, so we should be free to choose.
Jonathan Peyton28510722016-02-25 18:04:09 +0000708 */
Jim Cownie181b4bb2013-12-23 17:28:57 +0000709 if (taskdata->td_flags.destructors_thunk) {
Jonathan Peyton28510722016-02-25 18:04:09 +0000710 kmp_routine_entry_t destr_thunk = task->data1.destructors;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000711 KMP_ASSERT(destr_thunk);
712 destr_thunk(gtid, task);
713 }
714#endif // OMP_40_ENABLED
715
Jim Cownie5e8470a2013-09-27 10:38:44 +0000716 // bookkeeping for resuming task:
717 // GEH - note tasking_ser => task_serial
718 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
719 taskdata->td_flags.task_serial);
720 if ( taskdata->td_flags.task_serial )
721 {
722 if (resumed_task == NULL) {
723 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
724 }
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000725 else
726#if OMP_45_ENABLED
727 if ( !(task_team && task_team->tt.tt_found_proxy_tasks) )
728#endif
729 {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730 // verify resumed task passed in points to parent
731 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
732 }
733 }
734 else {
735 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
736 }
737
738 // Free this task and then ancestor tasks if they have no children.
Jonathan Peyton727ba6e2016-01-27 21:20:26 +0000739 // Restore th_current_task first as suggested by John:
740 // johnmc: if an asynchronous inquiry peers into the runtime system
741 // it doesn't see the freed task as the current task.
742 thread->th.th_current_task = resumed_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000743 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
744
Jim Cownie5e8470a2013-09-27 10:38:44 +0000745 // TODO: GEH - make sure root team implicit task is initialized properly.
746 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
747 resumed_task->td_flags.executing = 1; // resume previous task
748
749 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
750 gtid, taskdata, resumed_task) );
751
752 return;
753}
754
755//---------------------------------------------------------------------
756// __kmpc_omp_task_complete_if0: report that a task has completed execution
757// loc_ref: source location information; points to end of task block.
758// gtid: global thread number.
759// task: task thunk for the completed task.
760
761void
762__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
763{
764 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
765 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
766
767 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
768
769 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
770 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
771
772 return;
773}
774
775#ifdef TASK_UNUSED
776//---------------------------------------------------------------------
777// __kmpc_omp_task_complete: report that a task has completed execution
778// NEVER GENERATED BY COMPILER, DEPRECATED!!!
779
780void
781__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
782{
783 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
784 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
785
786 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
787
788 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
789 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
790 return;
791}
792#endif // TASK_UNUSED
793
794
Andrey Churbanove5f44922015-04-29 16:22:07 +0000795#if OMPT_SUPPORT
796//----------------------------------------------------------------------------------------------------
797// __kmp_task_init_ompt:
Jonathan Peytonb401db62015-10-09 17:38:05 +0000798// Initialize OMPT fields maintained by a task. This will only be called after
799// ompt_tool, so we already know whether ompt is enabled or not.
Andrey Churbanove5f44922015-04-29 16:22:07 +0000800
Jonathan Peytonb401db62015-10-09 17:38:05 +0000801static inline void
802__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function )
Andrey Churbanove5f44922015-04-29 16:22:07 +0000803{
Jonathan Peytonb401db62015-10-09 17:38:05 +0000804 if (ompt_enabled) {
805 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
806 task->ompt_task_info.function = function;
807 task->ompt_task_info.frame.exit_runtime_frame = NULL;
808 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000809#if OMP_40_ENABLED
810 task->ompt_task_info.ndeps = 0;
811 task->ompt_task_info.deps = NULL;
812#endif /* OMP_40_ENABLED */
Jonathan Peytonb401db62015-10-09 17:38:05 +0000813 }
Andrey Churbanove5f44922015-04-29 16:22:07 +0000814}
815#endif
816
817
Jim Cownie5e8470a2013-09-27 10:38:44 +0000818//----------------------------------------------------------------------------------------------------
819// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
820//
821// loc_ref: reference to source location of parallel region
822// this_thr: thread data structure corresponding to implicit task
823// team: team for this_thr
824// tid: thread id of given thread within team
825// set_curr_task: TRUE if need to push current task to thread
826// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
827// TODO: Get better loc_ref. Value passed in may be NULL
828
829void
830__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
831{
832 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
833
834 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
835 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
836
837 task->td_task_id = KMP_GEN_TASK_ID();
838 task->td_team = team;
839// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
840 task->td_ident = loc_ref;
841 task->td_taskwait_ident = NULL;
842 task->td_taskwait_counter = 0;
843 task->td_taskwait_thread = 0;
844
845 task->td_flags.tiedness = TASK_TIED;
846 task->td_flags.tasktype = TASK_IMPLICIT;
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000847#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000848 task->td_flags.proxy = TASK_FULL;
849#endif
850
Jim Cownie5e8470a2013-09-27 10:38:44 +0000851 // All implicit tasks are executed immediately, not deferred
852 task->td_flags.task_serial = 1;
853 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
854 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
855
856 task->td_flags.started = 1;
857 task->td_flags.executing = 1;
858 task->td_flags.complete = 0;
859 task->td_flags.freed = 0;
860
Jim Cownie181b4bb2013-12-23 17:28:57 +0000861#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000862 task->td_dephash = NULL;
863 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000864#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000865
866 if (set_curr_task) { // only do this initialization the first time a thread is created
867 task->td_incomplete_child_tasks = 0;
868 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
869#if OMP_40_ENABLED
870 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
871#endif
872 __kmp_push_current_task_to_thread( this_thr, team, tid );
873 } else {
874 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
875 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
876 }
877
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000878#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +0000879 __kmp_task_init_ompt(task, tid, NULL);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000880#endif
881
Jim Cownie5e8470a2013-09-27 10:38:44 +0000882 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
883 tid, team, task ) );
884}
885
886// Round up a size to a power of two specified by val
887// Used to insert padding between structures co-allocated using a single malloc() call
888static size_t
889__kmp_round_up_to_val( size_t size, size_t val ) {
890 if ( size & ( val - 1 ) ) {
891 size &= ~ ( val - 1 );
892 if ( size <= KMP_SIZE_T_MAX - val ) {
893 size += val; // Round up if there is no overflow.
894 }; // if
895 }; // if
896 return size;
897} // __kmp_round_up_to_va
898
899
900//---------------------------------------------------------------------------------
901// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
902//
903// loc_ref: source location information
904// gtid: global thread number.
905// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
906// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
907// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
908// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
909// task_entry: Pointer to task code entry point generated by compiler.
910// returns: a pointer to the allocated kmp_task_t structure (task).
911
912kmp_task_t *
913__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
914 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
915 kmp_routine_entry_t task_entry )
916{
917 kmp_task_t *task;
918 kmp_taskdata_t *taskdata;
919 kmp_info_t *thread = __kmp_threads[ gtid ];
920 kmp_team_t *team = thread->th.th_team;
921 kmp_taskdata_t *parent_task = thread->th.th_current_task;
922 size_t shareds_offset;
923
924 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
925 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
926 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
927 sizeof_shareds, task_entry) );
928
929 if ( parent_task->td_flags.final ) {
930 if (flags->merged_if0) {
931 }
932 flags->final = 1;
933 }
934
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000935#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000936 if ( flags->proxy == TASK_PROXY ) {
937 flags->tiedness = TASK_UNTIED;
938 flags->merged_if0 = 1;
939
940 /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
941 if ( (thread->th.th_task_team) == NULL ) {
942 /* This should only happen if the team is serialized
943 setup a task team and propagate it to the thread
944 */
945 KMP_DEBUG_ASSERT(team->t.t_serialized);
946 KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
Jonathan Peyton54127982015-11-04 21:37:48 +0000947 __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000948 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
949 }
950 kmp_task_team_t * task_team = thread->th.th_task_team;
951
952 /* tasking must be enabled now as the task might not be pushed */
953 if ( !KMP_TASKING_ENABLED( task_team ) ) {
954 KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
955 __kmp_enable_tasking( task_team, thread );
956 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
957 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
958 // No lock needed since only owner can allocate
959 if (thread_data -> td.td_deque == NULL ) {
960 __kmp_alloc_task_deque( thread, thread_data );
961 }
962 }
963
964 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
965 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
966 }
967#endif
968
Jim Cownie5e8470a2013-09-27 10:38:44 +0000969 // Calculate shared structure offset including padding after kmp_task_t struct
970 // to align pointers in shared struct
971 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
972 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
973
974 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
975 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
976 gtid, shareds_offset) );
977 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
978 gtid, sizeof_shareds) );
979
980 // Avoid double allocation here by combining shareds with taskdata
981 #if USE_FAST_MEMORY
982 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
983 #else /* ! USE_FAST_MEMORY */
984 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
985 #endif /* USE_FAST_MEMORY */
986
987 task = KMP_TASKDATA_TO_TASK(taskdata);
988
989 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000990#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000991 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
992 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
993#else
994 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
995 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
996#endif
997 if (sizeof_shareds > 0) {
998 // Avoid double allocation here by combining shareds with taskdata
999 task->shareds = & ((char *) taskdata)[ shareds_offset ];
1000 // Make sure shareds struct is aligned to pointer size
1001 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
1002 } else {
1003 task->shareds = NULL;
1004 }
1005 task->routine = task_entry;
1006 task->part_id = 0; // AC: Always start with 0 part id
1007
1008 taskdata->td_task_id = KMP_GEN_TASK_ID();
1009 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001010 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001011 taskdata->td_parent = parent_task;
1012 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
Jonathan Peytone6643da2016-04-18 21:35:14 +00001013 taskdata->td_untied_count = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001014 taskdata->td_ident = loc_ref;
1015 taskdata->td_taskwait_ident = NULL;
1016 taskdata->td_taskwait_counter = 0;
1017 taskdata->td_taskwait_thread = 0;
1018 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001019#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001020 // avoid copying icvs for proxy tasks
1021 if ( flags->proxy == TASK_FULL )
1022#endif
1023 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001024
1025 taskdata->td_flags.tiedness = flags->tiedness;
1026 taskdata->td_flags.final = flags->final;
1027 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001028#if OMP_40_ENABLED
1029 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1030#endif // OMP_40_ENABLED
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001031#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001032 taskdata->td_flags.proxy = flags->proxy;
Jonathan Peyton134f90d2016-02-11 23:07:30 +00001033 taskdata->td_task_team = thread->th.th_task_team;
Jonathan Peyton283a2152016-03-02 22:47:51 +00001034 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001035#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001036 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1037
1038 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1039 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
1040
1041 // GEH - TODO: fix this to copy parent task's value of team_serial flag
1042 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
1043
1044 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
1045 // tasks are not left until program termination to execute. Also, it helps locality to execute
1046 // immediately.
Jonathan Peyton7881aa12015-05-21 21:16:38 +00001047 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
Jim Cownie5e8470a2013-09-27 10:38:44 +00001048 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
1049
1050 taskdata->td_flags.started = 0;
1051 taskdata->td_flags.executing = 0;
1052 taskdata->td_flags.complete = 0;
1053 taskdata->td_flags.freed = 0;
1054
1055 taskdata->td_flags.native = flags->native;
1056
1057 taskdata->td_incomplete_child_tasks = 0;
1058 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1059#if OMP_40_ENABLED
1060 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1061 taskdata->td_dephash = NULL;
1062 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001063#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001064
1065 // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001066#if OMP_45_ENABLED
Jonathan Peyton61118492016-05-20 19:03:38 +00001067 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001068#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001069 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001070#endif
1071 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001072 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1073#if OMP_40_ENABLED
1074 if ( parent_task->td_taskgroup )
1075 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1076#endif
1077 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1078 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1079 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1080 }
1081 }
1082
1083 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1084 gtid, taskdata, taskdata->td_parent) );
1085
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001086#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +00001087 __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001088#endif
1089
Jim Cownie5e8470a2013-09-27 10:38:44 +00001090 return task;
1091}
1092
1093
1094kmp_task_t *
1095__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1096 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1097 kmp_routine_entry_t task_entry )
1098{
1099 kmp_task_t *retval;
1100 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1101
1102 input_flags->native = FALSE;
1103 // __kmp_task_alloc() sets up all other runtime flags
1104
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001105#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001106 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001107 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1108 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001109 input_flags->proxy ? "proxy" : "",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001110 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001111#else
1112 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1113 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1114 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1115 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1116#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001117
1118 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1119 sizeof_shareds, task_entry );
1120
1121 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1122
1123 return retval;
1124}
1125
1126//-----------------------------------------------------------
1127// __kmp_invoke_task: invoke the specified task
1128//
1129// gtid: global thread ID of caller
1130// task: the task to invoke
1131// current_task: the task to resume after task invokation
1132
1133static void
1134__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1135{
1136 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001137 kmp_uint64 cur_time;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001138#if OMP_40_ENABLED
1139 int discard = 0 /* false */;
1140#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001141 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1142 gtid, taskdata, current_task) );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00001143 KMP_DEBUG_ASSERT(task);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001144#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001145 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1146 taskdata->td_flags.complete == 1)
1147 {
1148 // This is a proxy task that was already completed but it needs to run
1149 // its bottom-half finish
1150 KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1151 gtid, taskdata) );
1152
1153 __kmp_bottom_half_finish_proxy(gtid,task);
1154
1155 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1156
1157 return;
1158 }
1159#endif
1160
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001161#if USE_ITT_BUILD && USE_ITT_NOTIFY
1162 if(__kmp_forkjoin_frames_mode == 3) {
1163 // Get the current time stamp to measure task execution time to correct barrier imbalance time
1164 cur_time = __itt_get_timestamp();
1165 }
1166#endif
1167
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001168#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001169 // Proxy tasks are not handled by the runtime
1170 if ( taskdata->td_flags.proxy != TASK_PROXY )
1171#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001172 __kmp_task_start( gtid, task, current_task );
1173
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001174#if OMPT_SUPPORT
1175 ompt_thread_info_t oldInfo;
1176 kmp_info_t * thread;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001177 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001178 // Store the threads states and restore them after the task
1179 thread = __kmp_threads[ gtid ];
1180 oldInfo = thread->th.ompt_thread_info;
1181 thread->th.ompt_thread_info.wait_id = 0;
1182 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1183 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1184 }
1185#endif
1186
Jim Cownie181b4bb2013-12-23 17:28:57 +00001187#if OMP_40_ENABLED
1188 // TODO: cancel tasks if the parallel region has also been cancelled
1189 // TODO: check if this sequence can be hoisted above __kmp_task_start
1190 // if cancellation has been enabled for this run ...
1191 if (__kmp_omp_cancellation) {
1192 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1193 kmp_team_t * this_team = this_thr->th.th_team;
1194 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1195 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001196 KMP_COUNT_BLOCK(TASK_cancelled);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001197 // this task belongs to a task group and we need to cancel it
1198 discard = 1 /* true */;
1199 }
1200 }
1201
Jim Cownie5e8470a2013-09-27 10:38:44 +00001202 //
1203 // Invoke the task routine and pass in relevant data.
1204 // Thunks generated by gcc take a different argument list.
1205 //
Jim Cownie181b4bb2013-12-23 17:28:57 +00001206 if (!discard) {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001207#if KMP_STATS_ENABLED
Jonathan Peyton45be4502015-08-11 21:36:41 +00001208 KMP_COUNT_BLOCK(TASK_executed);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001209 switch(KMP_GET_THREAD_STATE()) {
1210 case FORK_JOIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); break;
1211 case PLAIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); break;
1212 case TASKYIELD: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); break;
1213 case TASKWAIT: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); break;
1214 case TASKGROUP: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); break;
1215 default: KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); break;
1216 }
1217#endif // KMP_STATS_ENABLED
Jim Cownie181b4bb2013-12-23 17:28:57 +00001218#endif // OMP_40_ENABLED
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001219
1220#if OMPT_SUPPORT && OMPT_TRACE
1221 /* let OMPT know that we're about to run this task */
1222 if (ompt_enabled &&
1223 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1224 {
1225 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1226 current_task->ompt_task_info.task_id,
1227 taskdata->ompt_task_info.task_id);
1228 }
1229#endif
1230
Jim Cownie5e8470a2013-09-27 10:38:44 +00001231#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +00001232 if (taskdata->td_flags.native) {
1233 ((void (*)(void *))(*(task->routine)))(task->shareds);
1234 }
1235 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001236#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001237 {
1238 (*(task->routine))(gtid, task);
1239 }
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001240 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001241
1242#if OMPT_SUPPORT && OMPT_TRACE
1243 /* let OMPT know that we're returning to the callee task */
1244 if (ompt_enabled &&
1245 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1246 {
1247 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1248 taskdata->ompt_task_info.task_id,
1249 current_task->ompt_task_info.task_id);
1250 }
1251#endif
1252
Jim Cownie181b4bb2013-12-23 17:28:57 +00001253#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001254 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001255#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001256
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001257
1258#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001259 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001260 thread->th.ompt_thread_info = oldInfo;
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001261 taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001262 }
1263#endif
1264
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001265#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001266 // Proxy tasks are not handled by the runtime
1267 if ( taskdata->td_flags.proxy != TASK_PROXY )
1268#endif
1269 __kmp_task_finish( gtid, task, current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001270
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001271#if USE_ITT_BUILD && USE_ITT_NOTIFY
1272 // Barrier imbalance - correct arrive time after the task finished
1273 if(__kmp_forkjoin_frames_mode == 3) {
1274 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1275 if(this_thr->th.th_bar_arrive_time) {
1276 this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1277 }
1278 }
1279#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001280 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001281 gtid, taskdata, current_task) );
1282 return;
1283}
1284
1285//-----------------------------------------------------------------------
1286// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1287//
1288// loc_ref: location of original task pragma (ignored)
1289// gtid: Global Thread ID of encountering thread
1290// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1291// Returns:
1292// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1293// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1294
1295kmp_int32
1296__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1297{
1298 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1299
1300 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1301 gtid, loc_ref, new_taskdata ) );
1302
1303 /* Should we execute the new task or queue it? For now, let's just always try to
1304 queue it. If the queue fills up, then we'll execute it. */
1305
1306 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1307 { // Execute this task immediately
1308 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1309 new_taskdata->td_flags.task_serial = 1;
1310 __kmp_invoke_task( gtid, new_task, current_task );
1311 }
1312
1313 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1314 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1315 new_taskdata ) );
1316
1317 return TASK_CURRENT_NOT_QUEUED;
1318}
1319
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001320//---------------------------------------------------------------------
1321// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1322// gtid: Global Thread ID of encountering thread
1323// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1324// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1325// returns:
1326//
1327// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1328// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1329kmp_int32
1330__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1331{
1332 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1333
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001334#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001335 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001336 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001337 __builtin_frame_address(1);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001338 }
1339#endif
1340
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001341 /* Should we execute the new task or queue it? For now, let's just always try to
1342 queue it. If the queue fills up, then we'll execute it. */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001343#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001344 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1345#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001346 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001347#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001348 { // Execute this task immediately
1349 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1350 if ( serialize_immediate )
1351 new_taskdata -> td_flags.task_serial = 1;
1352 __kmp_invoke_task( gtid, new_task, current_task );
1353 }
1354
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001355#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001356 if (ompt_enabled) {
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001357 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001358 }
1359#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001360
1361 return TASK_CURRENT_NOT_QUEUED;
1362}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001363
1364//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001365// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1366// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001367// loc_ref: location of original task pragma (ignored)
1368// gtid: Global Thread ID of encountering thread
1369// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1370// returns:
1371//
1372// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1373// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1374
1375kmp_int32
1376__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1377{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001378 kmp_int32 res;
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001379 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001380
Jonathan Peytond2eb3c72015-08-26 20:02:21 +00001381#if KMP_DEBUG
1382 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1383#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001384 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1385 gtid, loc_ref, new_taskdata ) );
1386
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001387 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001388
1389 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1390 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001391 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001392}
1393
Jim Cownie5e8470a2013-09-27 10:38:44 +00001394//-------------------------------------------------------------------------------------
1395// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1396
1397kmp_int32
1398__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1399{
1400 kmp_taskdata_t * taskdata;
1401 kmp_info_t * thread;
1402 int thread_finished = FALSE;
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001403 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001404
Jonathan Peyton54127982015-11-04 21:37:48 +00001405 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001406
1407 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1408 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1409
1410 thread = __kmp_threads[ gtid ];
1411 taskdata = thread -> th.th_current_task;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001412
1413#if OMPT_SUPPORT && OMPT_TRACE
1414 ompt_task_id_t my_task_id;
1415 ompt_parallel_id_t my_parallel_id;
Jonathan Peyton61118492016-05-20 19:03:38 +00001416
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001417 if (ompt_enabled) {
1418 kmp_team_t *team = thread->th.th_team;
1419 my_task_id = taskdata->ompt_task_info.task_id;
1420 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peyton61118492016-05-20 19:03:38 +00001421
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001422 taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(1);
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001423 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1424 ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
1425 my_parallel_id, my_task_id);
1426 }
1427 }
1428#endif
1429
Jonathan Peyton8c61c592016-06-21 15:59:34 +00001430 // Debugger: The taskwait is active. Store location and thread encountered the taskwait.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001431#if USE_ITT_BUILD
1432 // Note: These values are used by ITT events as well.
1433#endif /* USE_ITT_BUILD */
1434 taskdata->td_taskwait_counter += 1;
1435 taskdata->td_taskwait_ident = loc_ref;
1436 taskdata->td_taskwait_thread = gtid + 1;
1437
1438#if USE_ITT_BUILD
1439 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1440 if ( itt_sync_obj != NULL )
1441 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1442#endif /* USE_ITT_BUILD */
1443
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001444#if OMP_45_ENABLED
Jonathan Peyton61118492016-05-20 19:03:38 +00001445 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001446#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001447 if ( ! taskdata->td_flags.team_serial )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001448#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001449 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001450 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001451 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001452 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001453 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1454 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001455 }
1456 }
1457#if USE_ITT_BUILD
1458 if ( itt_sync_obj != NULL )
1459 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1460#endif /* USE_ITT_BUILD */
1461
1462 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
Jonathan Peyton8c61c592016-06-21 15:59:34 +00001463 // Debugger: The taskwait is completed. Location remains, but thread is negated.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001464 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001465
1466#if OMPT_SUPPORT && OMPT_TRACE
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001467 if (ompt_enabled) {
1468 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1469 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001470 my_parallel_id, my_task_id);
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001471 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001472 taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001473 }
1474#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001475 }
1476
1477 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1478 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1479
1480 return TASK_CURRENT_NOT_QUEUED;
1481}
1482
1483
1484//-------------------------------------------------
1485// __kmpc_omp_taskyield: switch to a different task
1486
1487kmp_int32
1488__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1489{
1490 kmp_taskdata_t * taskdata;
1491 kmp_info_t * thread;
1492 int thread_finished = FALSE;
1493
Jonathan Peyton45be4502015-08-11 21:36:41 +00001494 KMP_COUNT_BLOCK(OMP_TASKYIELD);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001495 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001496
Jim Cownie5e8470a2013-09-27 10:38:44 +00001497 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1498 gtid, loc_ref, end_part) );
1499
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001500 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001501 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1502
1503 thread = __kmp_threads[ gtid ];
1504 taskdata = thread -> th.th_current_task;
1505 // Should we model this as a task wait or not?
Jonathan Peyton8c61c592016-06-21 15:59:34 +00001506 // Debugger: The taskwait is active. Store location and thread encountered the taskwait.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001507#if USE_ITT_BUILD
1508 // Note: These values are used by ITT events as well.
1509#endif /* USE_ITT_BUILD */
1510 taskdata->td_taskwait_counter += 1;
1511 taskdata->td_taskwait_ident = loc_ref;
1512 taskdata->td_taskwait_thread = gtid + 1;
1513
1514#if USE_ITT_BUILD
1515 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1516 if ( itt_sync_obj != NULL )
1517 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1518#endif /* USE_ITT_BUILD */
1519 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001520 kmp_task_team_t * task_team = thread->th.th_task_team;
1521 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001522 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001523 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1524 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1525 }
1526 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001527 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001528#if USE_ITT_BUILD
1529 if ( itt_sync_obj != NULL )
1530 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1531#endif /* USE_ITT_BUILD */
1532
1533 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
Jonathan Peyton8c61c592016-06-21 15:59:34 +00001534 // Debugger: The taskwait is completed. Location remains, but thread is negated.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001535 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1536 }
1537
1538 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1539 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1540
1541 return TASK_CURRENT_NOT_QUEUED;
1542}
1543
1544
1545#if OMP_40_ENABLED
1546//-------------------------------------------------------------------------------------
1547// __kmpc_taskgroup: Start a new taskgroup
1548
1549void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001550__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001551{
1552 kmp_info_t * thread = __kmp_threads[ gtid ];
1553 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1554 kmp_taskgroup_t * tg_new =
1555 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1556 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1557 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001558 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001559 tg_new->parent = taskdata->td_taskgroup;
1560 taskdata->td_taskgroup = tg_new;
1561}
1562
1563
1564//-------------------------------------------------------------------------------------
1565// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1566// and its descendants are complete
1567
1568void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001569__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001570{
1571 kmp_info_t * thread = __kmp_threads[ gtid ];
1572 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1573 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1574 int thread_finished = FALSE;
1575
1576 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1577 KMP_DEBUG_ASSERT( taskgroup != NULL );
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001578 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001579
1580 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1581#if USE_ITT_BUILD
1582 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1583 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1584 if ( itt_sync_obj != NULL )
1585 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1586#endif /* USE_ITT_BUILD */
1587
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001588#if OMP_45_ENABLED
Jonathan Peyton61118492016-05-20 19:03:38 +00001589 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001590#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001591 if ( ! taskdata->td_flags.team_serial )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001592#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001593 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001594 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001595 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001596 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1597 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001598 }
1599 }
1600
1601#if USE_ITT_BUILD
1602 if ( itt_sync_obj != NULL )
1603 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1604#endif /* USE_ITT_BUILD */
1605 }
1606 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1607
1608 // Restore parent taskgroup for the current task
1609 taskdata->td_taskgroup = taskgroup->parent;
1610 __kmp_thread_free( thread, taskgroup );
1611
1612 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1613}
1614#endif
1615
1616
1617//------------------------------------------------------
1618// __kmp_remove_my_task: remove a task from my own deque
1619
1620static kmp_task_t *
1621__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1622 kmp_int32 is_constrained )
1623{
1624 kmp_task_t * task;
1625 kmp_taskdata_t * taskdata;
1626 kmp_thread_data_t *thread_data;
1627 kmp_uint32 tail;
1628
1629 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1630 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1631
1632 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1633
1634 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1635 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1636 thread_data->td.td_deque_tail) );
1637
1638 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1639 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1640 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1641 thread_data->td.td_deque_tail) );
1642 return NULL;
1643 }
1644
1645 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1646
1647 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1648 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1649 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1650 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1651 thread_data->td.td_deque_tail) );
1652 return NULL;
1653 }
1654
Jonathan Peytonf4f96952016-05-31 19:07:00 +00001655 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(thread_data->td); // Wrap index.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001656 taskdata = thread_data -> td.td_deque[ tail ];
1657
Jonathan Peyton8cb45c82016-06-13 17:51:59 +00001658 if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001659 // we need to check if the candidate obeys task scheduling constraint:
1660 // only child of current task can be scheduled
1661 kmp_taskdata_t * current = thread->th.th_current_task;
1662 kmp_int32 level = current->td_level;
1663 kmp_taskdata_t * parent = taskdata->td_parent;
1664 while ( parent != current && parent->td_level > level ) {
1665 parent = parent->td_parent; // check generation up to the level of the current task
1666 KMP_DEBUG_ASSERT(parent != NULL);
1667 }
1668 if ( parent != current ) {
Jonathan Peytonb6f0f522016-06-09 18:51:17 +00001669 // If the tail task is not a child, then no other child can appear in the deque.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001670 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1671 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1672 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1673 thread_data->td.td_deque_tail) );
1674 return NULL;
1675 }
1676 }
1677
1678 thread_data -> td.td_deque_tail = tail;
1679 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1680
1681 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1682
1683 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1684 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1685 thread_data->td.td_deque_tail) );
1686
1687 task = KMP_TASKDATA_TO_TASK( taskdata );
1688 return task;
1689}
1690
1691
1692//-----------------------------------------------------------
1693// __kmp_steal_task: remove a task from another thread's deque
1694// Assume that calling thread has already checked existence of
1695// task_team thread_data before calling this routine.
1696
1697static kmp_task_t *
1698__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1699 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1700 kmp_int32 is_constrained )
1701{
1702 kmp_task_t * task;
1703 kmp_taskdata_t * taskdata;
1704 kmp_thread_data_t *victim_td, *threads_data;
Jonathan Peyton7c4d66d2015-06-08 20:01:14 +00001705 kmp_int32 victim_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001706
1707 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1708
1709 threads_data = task_team -> tt.tt_threads_data;
1710 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1711
1712 victim_tid = victim->th.th_info.ds.ds_tid;
1713 victim_td = & threads_data[ victim_tid ];
1714
1715 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1716 "head=%u tail=%u\n",
1717 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1718 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1719
1720 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1721 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1722 {
1723 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1724 "ntasks=%d head=%u tail=%u\n",
1725 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1726 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1727 return NULL;
1728 }
1729
1730 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1731
1732 // Check again after we acquire the lock
1733 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1734 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1735 {
1736 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1737 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1738 "ntasks=%d head=%u tail=%u\n",
1739 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1740 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1741 return NULL;
1742 }
1743
1744 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1745
1746 if ( !is_constrained ) {
1747 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001748 KMP_ASSERT(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001749 // Bump head pointer and Wrap.
Jonathan Peytonf4f96952016-05-31 19:07:00 +00001750 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK(victim_td->td);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001751 } else {
1752 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
Jonathan Peytonf4f96952016-05-31 19:07:00 +00001753 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(victim_td->td); // Wrap index.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001754 taskdata = victim_td -> td.td_deque[ tail ];
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001755 KMP_ASSERT(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001756 // we need to check if the candidate obeys task scheduling constraint:
1757 // only child of current task can be scheduled
1758 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1759 kmp_int32 level = current->td_level;
1760 kmp_taskdata_t * parent = taskdata->td_parent;
1761 while ( parent != current && parent->td_level > level ) {
1762 parent = parent->td_parent; // check generation up to the level of the current task
1763 KMP_DEBUG_ASSERT(parent != NULL);
1764 }
Jonathan Peyton8cb45c82016-06-13 17:51:59 +00001765 if ( parent != current && (taskdata->td_flags.tiedness == TASK_TIED) ) { // untied is always allowed to be stolen
Jim Cownie5e8470a2013-09-27 10:38:44 +00001766 // If the tail task is not a child, then no other childs can appear in the deque (?).
1767 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1768 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1769 "ntasks=%d head=%u tail=%u\n",
1770 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1771 task_team, victim_td->td.td_deque_ntasks,
1772 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1773 return NULL;
1774 }
1775 victim_td -> td.td_deque_tail = tail;
1776 }
1777 if (*thread_finished) {
1778 // We need to un-mark this victim as a finished victim. This must be done before
1779 // releasing the lock, or else other threads (starting with the master victim)
1780 // might be prematurely released from the barrier!!!
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001781 kmp_uint32 count;
1782
1783 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001784
1785 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1786 gtid, count + 1, task_team) );
1787
1788 *thread_finished = FALSE;
1789 }
1790 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1791
1792 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1793
Jonathan Peyton45be4502015-08-11 21:36:41 +00001794 KMP_COUNT_BLOCK(TASK_stolen);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001795 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001796 "ntasks=%d head=%u tail=%u\n",
1797 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1798 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1799 victim_td->td.td_deque_tail) );
1800
1801 task = KMP_TASKDATA_TO_TASK( taskdata );
1802 return task;
1803}
1804
1805
1806//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001807// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001808// is statisfied (return true) or there are none left (return false).
1809// final_spin is TRUE if this is the spin at the release barrier.
1810// thread_finished indicates whether the thread is finished executing all
1811// the tasks it has on its deque, and is at the release barrier.
1812// spinner is the location on which to spin.
1813// spinner == NULL means only execute a single task and return.
1814// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001815template <class C>
Jonathan Peyton61118492016-05-20 19:03:38 +00001816static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001817 int *thread_finished
1818 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001819{
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001820 kmp_task_team_t * task_team = thread->th.th_task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001821 kmp_thread_data_t * threads_data;
1822 kmp_task_t * task;
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001823 kmp_info_t * other_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001824 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1825 volatile kmp_uint32 * unfinished_threads;
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001826 kmp_int32 nthreads, victim=-2, use_own_tasks=1, new_victim=0, tid=thread->th.th_info.ds.ds_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001827
1828 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1829 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1830
Jonathan Peyton54127982015-11-04 21:37:48 +00001831 if (task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001832
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001833 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001834 gtid, final_spin, *thread_finished) );
1835
1836 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1837 KMP_DEBUG_ASSERT( threads_data != NULL );
1838
1839 nthreads = task_team -> tt.tt_nproc;
1840 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001841#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001842 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1843#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001844 KMP_DEBUG_ASSERT( nthreads > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001845#endif
Paul Osmialowski9cc353e2016-06-01 09:59:26 +00001846 KMP_DEBUG_ASSERT( (int)(TCR_4(*unfinished_threads)) >= 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001847
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001848 while (1) { // Outer loop keeps trying to find tasks in case of single thread getting tasks from target constructs
1849 while (1) { // Inner loop to find a task and execute it
1850 task = NULL;
1851 if (use_own_tasks) { // check on own queue first
1852 task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001853 }
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001854 if ((task == NULL) && (nthreads > 1)) { // Steal a task
1855 int asleep = 1;
1856 use_own_tasks = 0;
1857 // Try to steal from the last place I stole from successfully.
1858 if (victim == -2) { // haven't stolen anything yet
1859 victim = threads_data[tid].td.td_deque_last_stolen;
1860 if (victim != -1) // if we have a last stolen from victim, get the thread
1861 other_thread = threads_data[victim].td.td_thr;
1862 }
1863 if (victim != -1) { // found last victim
1864 asleep = 0;
1865 }
1866 else if (!new_victim) { // no recent steals and we haven't already used a new victim; select a random thread
1867 do { // Find a different thread to steal work from.
1868 // Pick a random thread. Initial plan was to cycle through all the threads, and only return if
1869 // we tried to steal from every thread, and failed. Arch says that's not such a great idea.
1870 victim = __kmp_get_random(thread) % (nthreads - 1);
1871 if (victim >= tid) {
1872 ++victim; // Adjusts random distribution to exclude self
1873 }
1874 // Found a potential victim
1875 other_thread = threads_data[victim].td.td_thr;
1876 // There is a slight chance that __kmp_enable_tasking() did not wake up all threads
1877 // waiting at the barrier. If victim is sleeping, then wake it up. Since we were going to
1878 // pay the cache miss penalty for referencing another thread's kmp_info_t struct anyway,
1879 // the check shouldn't cost too much performance at this point. In extra barrier mode, tasks
1880 // do not sleep at the separate tasking barrier, so this isn't a problem.
1881 asleep = 0;
1882 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1883 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1884 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) {
1885 asleep = 1;
1886 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
1887 // A sleeping thread should not have any tasks on it's queue. There is a slight
1888 // possibility that it resumes, steals a task from another thread, which spawns more
1889 // tasks, all in the time that it takes this thread to check => don't write an assertion
1890 // that the victim's queue is empty. Try stealing from a different thread.
1891 }
1892 } while (asleep);
1893 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001894
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001895 if (!asleep) {
1896 // We have a victim to try to steal from
1897 task = __kmp_steal_task(other_thread, gtid, task_team, unfinished_threads, thread_finished, is_constrained);
1898 }
1899 if (task != NULL) { // set last stolen to victim
1900 if (threads_data[tid].td.td_deque_last_stolen != victim) {
1901 threads_data[tid].td.td_deque_last_stolen = victim;
1902 // The pre-refactored code did not try more than 1 successful new vicitm,
1903 // unless the last one generated more local tasks; new_victim keeps track of this
1904 new_victim = 1;
1905 }
1906 }
1907 else { // No tasks found; unset last_stolen
1908 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
1909 victim = -2; // no successful victim found
1910 }
1911 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001912
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001913 if (task == NULL) // break out of tasking loop
1914 break;
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001915
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001916 // Found a task; execute it
Jim Cownie5e8470a2013-09-27 10:38:44 +00001917#if USE_ITT_BUILD && USE_ITT_NOTIFY
1918 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001919 if ( itt_sync_obj == NULL ) { // we are at fork barrier where we could not get the object reliably
1920 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001921 }
1922 __kmp_itt_task_starting( itt_sync_obj );
1923 }
1924#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1925 __kmp_invoke_task( gtid, task, current_task );
1926#if USE_ITT_BUILD
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001927 if ( itt_sync_obj != NULL ) __kmp_itt_task_finished( itt_sync_obj );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001928#endif /* USE_ITT_BUILD */
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001929 // If this thread is only partway through the barrier and the condition is met, then return now,
1930 // so that the barrier gather/release pattern can proceed. If this thread is in the last spin loop
1931 // in the barrier, waiting to be released, we know that the termination condition will not be
1932 // satisified, so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001933 if (flag == NULL || (!final_spin && flag->done_check())) {
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001934 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001935 return TRUE;
1936 }
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001937 if (thread->th.th_task_team == NULL) {
1938 break;
1939 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001940 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001941 // If execution of a stolen task results in more tasks being placed on our run queue, reset use_own_tasks
1942 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
1943 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", gtid));
1944 use_own_tasks = 1;
1945 new_victim = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001946 }
1947 }
1948
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001949 // The task source has been exhausted. If in final spin loop of barrier, check if termination condition is satisfied.
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001950#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001951 // The work queue may be empty but there might be proxy tasks still executing
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001952 if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001953#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001954 if (final_spin)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001955#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001956 {
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001957 // First, decrement the #unfinished threads, if that has not already been done. This decrement
1958 // might be to the spin location, and result in the termination condition being satisfied.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001959 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001960 kmp_uint32 count;
1961
1962 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001963 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964 gtid, count, task_team) );
1965 *thread_finished = TRUE;
1966 }
1967
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001968 // It is now unsafe to reference thread->th.th_team !!!
1969 // Decrementing task_team->tt.tt_unfinished_threads can allow the master thread to pass through
1970 // the barrier, where it might reset each thread's th.th_team field for the next parallel region.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001972 if (flag != NULL && flag->done_check()) {
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001973 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001974 return TRUE;
1975 }
1976 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001977
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001978 // If this thread's task team is NULL, master has recognized that there are no more tasks; bail out
1979 if (thread->th.th_task_team == NULL) {
1980 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid) );
1981 return FALSE;
1982 }
1983
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001984#if OMP_45_ENABLED
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001985 // We could be getting tasks from target constructs; if this is the only thread, keep trying to execute
1986 // tasks from own queue
1987 if (nthreads == 1)
1988 use_own_tasks = 1;
1989 else
1990#endif
1991 {
1992 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid) );
1993 return FALSE;
1994 }
1995 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001996}
1997
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001998int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1999 int *thread_finished
2000 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2001{
2002 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2003 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2004}
2005
2006int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
2007 int *thread_finished
2008 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2009{
2010 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2011 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2012}
2013
2014int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
2015 int *thread_finished
2016 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2017{
2018 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2019 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2020}
2021
2022
Jim Cownie5e8470a2013-09-27 10:38:44 +00002023
2024//-----------------------------------------------------------------------------
2025// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2026// next barrier so they can assist in executing enqueued tasks.
2027// First thread in allocates the task team atomically.
2028
2029static void
2030__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2031{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002032 kmp_thread_data_t *threads_data;
2033 int nthreads, i, is_init_thread;
2034
2035 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2036 __kmp_gtid_from_thread( this_thr ) ) );
2037
2038 KMP_DEBUG_ASSERT(task_team != NULL);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002039 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002040
2041 nthreads = task_team->tt.tt_nproc;
2042 KMP_DEBUG_ASSERT(nthreads > 0);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002043 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002044
2045 // Allocate or increase the size of threads_data if necessary
2046 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2047
2048 if (!is_init_thread) {
2049 // Some other thread already set up the array.
2050 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2051 __kmp_gtid_from_thread( this_thr ) ) );
2052 return;
2053 }
2054 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2055 KMP_DEBUG_ASSERT( threads_data != NULL );
2056
2057 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2058 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2059 {
2060 // Release any threads sleeping at the barrier, so that they can steal
2061 // tasks and execute them. In extra barrier mode, tasks do not sleep
2062 // at the separate tasking barrier, so this isn't a problem.
2063 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002064 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002065 kmp_info_t *thread = threads_data[i].td.td_thr;
2066
2067 if (i == this_thr->th.th_info.ds.ds_tid) {
2068 continue;
2069 }
2070 // Since we haven't locked the thread's suspend mutex lock at this
2071 // point, there is a small window where a thread might be putting
2072 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002073 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002074 // see if other threads are sleeping (using the same random
2075 // mechanism that is used for task stealing) and awakens them if
2076 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002077 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002078 {
2079 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2080 __kmp_gtid_from_thread( this_thr ),
2081 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002082 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002083 }
2084 else {
2085 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2086 __kmp_gtid_from_thread( this_thr ),
2087 __kmp_gtid_from_thread( thread ) ) );
2088 }
2089 }
2090 }
2091
2092 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2093 __kmp_gtid_from_thread( this_thr ) ) );
2094}
2095
2096
2097/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002098/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00002099 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2100 * like a shadow of the kmp_team_t data struct, with a different lifetime.
2101 * After a child * thread checks into a barrier and calls __kmp_release() from
2102 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2103 * longer assume that the kmp_team_t structure is intact (at any moment, the
2104 * master thread may exit the barrier code and free the team data structure,
2105 * and return the threads to the thread pool).
2106 *
2107 * This does not work with the the tasking code, as the thread is still
2108 * expected to participate in the execution of any tasks that may have been
2109 * spawned my a member of the team, and the thread still needs access to all
2110 * to each thread in the team, so that it can steal work from it.
2111 *
2112 * Enter the existence of the kmp_task_team_t struct. It employs a reference
2113 * counting mechanims, and is allocated by the master thread before calling
2114 * __kmp_<barrier_kind>_release, and then is release by the last thread to
2115 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2116 * of the kmp_task_team_t structs for consecutive barriers can overlap
2117 * (and will, unless the master thread is the last thread to exit the barrier
2118 * release phase, which is not typical).
2119 *
2120 * The existence of such a struct is useful outside the context of tasking,
2121 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2122 * so that any performance differences show up when comparing the 2.5 vs. 3.0
2123 * libraries.
2124 *
2125 * We currently use the existence of the threads array as an indicator that
2126 * tasks were spawned since the last barrier. If the structure is to be
2127 * useful outside the context of tasking, then this will have to change, but
2128 * not settting the field minimizes the performance impact of tasking on
2129 * barriers, when no explicit tasks were spawned (pushed, actually).
2130 */
2131
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002132
Jim Cownie5e8470a2013-09-27 10:38:44 +00002133static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2134// Lock for task team data structures
2135static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2136
2137
2138//------------------------------------------------------------------------------
2139// __kmp_alloc_task_deque:
2140// Allocates a task deque for a particular thread, and initialize the necessary
2141// data structures relating to the deque. This only happens once per thread
2142// per task team since task teams are recycled.
2143// No lock is needed during allocation since each thread allocates its own
2144// deque.
2145
2146static void
2147__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2148{
2149 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2150 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2151
2152 // Initialize last stolen task field to "none"
2153 thread_data -> td.td_deque_last_stolen = -1;
2154
2155 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2156 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2157 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2158
2159 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002160 __kmp_gtid_from_thread( thread ), INITIAL_TASK_DEQUE_SIZE, thread_data ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002161 // Allocate space for task deque, and zero the deque
2162 // Cannot use __kmp_thread_calloc() because threads not around for
2163 // kmp_reap_task_team( ).
2164 thread_data -> td.td_deque = (kmp_taskdata_t **)
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002165 __kmp_allocate( INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2166 thread_data -> td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002167}
2168
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002169//------------------------------------------------------------------------------
2170// __kmp_realloc_task_deque:
2171// Re-allocates a task deque for a particular thread, copies the content from the old deque
2172// and adjusts the necessary data structures relating to the deque.
2173// This operation must be done with a the deque_lock being held
2174
2175static void __kmp_realloc_task_deque ( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2176{
2177 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
2178 kmp_int32 new_size = 2 * size;
2179
2180 KE_TRACE( 10, ( "__kmp_realloc_task_deque: T#%d reallocating deque[from %d to %d] for thread_data %p\n",
2181 __kmp_gtid_from_thread( thread ), size, new_size, thread_data ) );
2182
2183 kmp_taskdata_t ** new_deque = (kmp_taskdata_t **) __kmp_allocate( new_size * sizeof(kmp_taskdata_t *));
2184
2185 int i,j;
2186 for ( i = thread_data->td.td_deque_head, j = 0; j < size; i = (i+1) & TASK_DEQUE_MASK(thread_data->td), j++ )
2187 new_deque[j] = thread_data->td.td_deque[i];
2188
2189 __kmp_free(thread_data->td.td_deque);
2190
2191 thread_data -> td.td_deque_head = 0;
2192 thread_data -> td.td_deque_tail = size;
2193 thread_data -> td.td_deque = new_deque;
2194 thread_data -> td.td_deque_size = new_size;
2195}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002196
2197//------------------------------------------------------------------------------
2198// __kmp_free_task_deque:
2199// Deallocates a task deque for a particular thread.
2200// Happens at library deallocation so don't need to reset all thread data fields.
2201
2202static void
2203__kmp_free_task_deque( kmp_thread_data_t *thread_data )
2204{
2205 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2206
2207 if ( thread_data -> td.td_deque != NULL ) {
2208 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2209 __kmp_free( thread_data -> td.td_deque );
2210 thread_data -> td.td_deque = NULL;
2211 }
2212 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2213
2214#ifdef BUILD_TIED_TASK_STACK
2215 // GEH: Figure out what to do here for td_susp_tied_tasks
2216 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2217 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2218 }
2219#endif // BUILD_TIED_TASK_STACK
2220}
2221
2222
2223//------------------------------------------------------------------------------
2224// __kmp_realloc_task_threads_data:
2225// Allocates a threads_data array for a task team, either by allocating an initial
2226// array or enlarging an existing array. Only the first thread to get the lock
2227// allocs or enlarges the array and re-initializes the array eleemnts.
2228// That thread returns "TRUE", the rest return "FALSE".
2229// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2230// The current size is given by task_team -> tt.tt_max_threads.
2231
2232static int
2233__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2234{
2235 kmp_thread_data_t ** threads_data_p;
2236 kmp_int32 nthreads, maxthreads;
2237 int is_init_thread = FALSE;
2238
2239 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2240 // Already reallocated and initialized.
2241 return FALSE;
2242 }
2243
2244 threads_data_p = & task_team -> tt.tt_threads_data;
2245 nthreads = task_team -> tt.tt_nproc;
2246 maxthreads = task_team -> tt.tt_max_threads;
2247
2248 // All threads must lock when they encounter the first task of the implicit task
2249 // region to make sure threads_data fields are (re)initialized before used.
2250 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2251
2252 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2253 // first thread to enable tasking
2254 kmp_team_t *team = thread -> th.th_team;
2255 int i;
2256
2257 is_init_thread = TRUE;
2258 if ( maxthreads < nthreads ) {
2259
2260 if ( *threads_data_p != NULL ) {
2261 kmp_thread_data_t *old_data = *threads_data_p;
2262 kmp_thread_data_t *new_data = NULL;
2263
2264 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2265 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2266 __kmp_gtid_from_thread( thread ), task_team,
2267 nthreads, maxthreads ) );
2268 // Reallocate threads_data to have more elements than current array
2269 // Cannot use __kmp_thread_realloc() because threads not around for
2270 // kmp_reap_task_team( ). Note all new array entries are initialized
2271 // to zero by __kmp_allocate().
2272 new_data = (kmp_thread_data_t *)
2273 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2274 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002275 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002276 (void *) old_data,
2277 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002278
2279#ifdef BUILD_TIED_TASK_STACK
2280 // GEH: Figure out if this is the right thing to do
2281 for (i = maxthreads; i < nthreads; i++) {
2282 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2283 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2284 }
2285#endif // BUILD_TIED_TASK_STACK
2286 // Install the new data and free the old data
2287 (*threads_data_p) = new_data;
2288 __kmp_free( old_data );
2289 }
2290 else {
2291 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2292 "threads data for task_team %p, size = %d\n",
2293 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2294 // Make the initial allocate for threads_data array, and zero entries
2295 // Cannot use __kmp_thread_calloc() because threads not around for
2296 // kmp_reap_task_team( ).
2297 *threads_data_p = (kmp_thread_data_t *)
2298 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2299#ifdef BUILD_TIED_TASK_STACK
2300 // GEH: Figure out if this is the right thing to do
2301 for (i = 0; i < nthreads; i++) {
2302 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2303 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2304 }
2305#endif // BUILD_TIED_TASK_STACK
2306 }
2307 task_team -> tt.tt_max_threads = nthreads;
2308 }
2309 else {
2310 // If array has (more than) enough elements, go ahead and use it
2311 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2312 }
2313
2314 // initialize threads_data pointers back to thread_info structures
2315 for (i = 0; i < nthreads; i++) {
2316 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2317 thread_data -> td.td_thr = team -> t.t_threads[i];
2318
2319 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2320 // The last stolen field survives across teams / barrier, and the number
2321 // of threads may have changed. It's possible (likely?) that a new
2322 // parallel region will exhibit the same behavior as the previous region.
2323 thread_data -> td.td_deque_last_stolen = -1;
2324 }
2325 }
2326
2327 KMP_MB();
2328 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2329 }
2330
2331 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2332 return is_init_thread;
2333}
2334
2335
2336//------------------------------------------------------------------------------
2337// __kmp_free_task_threads_data:
2338// Deallocates a threads_data array for a task team, including any attached
2339// tasking deques. Only occurs at library shutdown.
2340
2341static void
2342__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2343{
2344 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2345 if ( task_team -> tt.tt_threads_data != NULL ) {
2346 int i;
2347 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2348 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2349 }
2350 __kmp_free( task_team -> tt.tt_threads_data );
2351 task_team -> tt.tt_threads_data = NULL;
2352 }
2353 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2354}
2355
2356
2357//------------------------------------------------------------------------------
2358// __kmp_allocate_task_team:
2359// Allocates a task team associated with a specific team, taking it from
2360// the global task team free list if possible. Also initializes data structures.
2361
2362static kmp_task_team_t *
2363__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2364{
2365 kmp_task_team_t *task_team = NULL;
2366 int nthreads;
2367
2368 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2369 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2370
2371 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2372 // Take a task team from the task team pool
2373 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2374 if (__kmp_free_task_teams != NULL) {
2375 task_team = __kmp_free_task_teams;
2376 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2377 task_team -> tt.tt_next = NULL;
2378 }
2379 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2380 }
2381
2382 if (task_team == NULL) {
2383 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2384 "task team for team %p\n",
2385 __kmp_gtid_from_thread( thread ), team ) );
2386 // Allocate a new task team if one is not available.
2387 // Cannot use __kmp_thread_malloc() because threads not around for
2388 // kmp_reap_task_team( ).
2389 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2390 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2391 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2392 //task_team -> tt.tt_max_threads = 0;
2393 //task_team -> tt.tt_next = NULL;
2394 }
2395
2396 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002397#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002398 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2399#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002400 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2401
Jim Cownie5e8470a2013-09-27 10:38:44 +00002402 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2403 TCW_4( task_team -> tt.tt_active, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002404
Jonathan Peyton54127982015-11-04 21:37:48 +00002405 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
2406 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002407 return task_team;
2408}
2409
2410
2411//------------------------------------------------------------------------------
2412// __kmp_free_task_team:
2413// Frees the task team associated with a specific thread, and adds it
2414// to the global task team free list.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002415
Jonathan Peyton54127982015-11-04 21:37:48 +00002416void
Jim Cownie5e8470a2013-09-27 10:38:44 +00002417__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2418{
2419 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2420 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2421
Jim Cownie5e8470a2013-09-27 10:38:44 +00002422 // Put task team back on free list
2423 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2424
2425 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2426 task_team -> tt.tt_next = __kmp_free_task_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002427 TCW_PTR(__kmp_free_task_teams, task_team);
2428
2429 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2430}
2431
2432
2433//------------------------------------------------------------------------------
2434// __kmp_reap_task_teams:
2435// Free all the task teams on the task team free list.
2436// Should only be done during library shutdown.
2437// Cannot do anything that needs a thread structure or gtid since they are already gone.
2438
2439void
2440__kmp_reap_task_teams( void )
2441{
2442 kmp_task_team_t *task_team;
2443
2444 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2445 // Free all task_teams on the free list
2446 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2447 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2448 __kmp_free_task_teams = task_team -> tt.tt_next;
2449 task_team -> tt.tt_next = NULL;
2450
2451 // Free threads_data if necessary
2452 if ( task_team -> tt.tt_threads_data != NULL ) {
2453 __kmp_free_task_threads_data( task_team );
2454 }
2455 __kmp_free( task_team );
2456 }
2457 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2458 }
2459}
2460
Jim Cownie5e8470a2013-09-27 10:38:44 +00002461//------------------------------------------------------------------------------
2462// __kmp_wait_to_unref_task_teams:
2463// Some threads could still be in the fork barrier release code, possibly
2464// trying to steal tasks. Wait for each thread to unreference its task team.
2465//
2466void
2467__kmp_wait_to_unref_task_teams(void)
2468{
2469 kmp_info_t *thread;
2470 kmp_uint32 spins;
2471 int done;
2472
2473 KMP_INIT_YIELD( spins );
2474
Jim Cownie5e8470a2013-09-27 10:38:44 +00002475 for (;;) {
2476 done = TRUE;
2477
2478 // TODO: GEH - this may be is wrong because some sync would be necessary
2479 // in case threads are added to the pool during the traversal.
2480 // Need to verify that lock for thread pool is held when calling
2481 // this routine.
2482 for (thread = (kmp_info_t *)__kmp_thread_pool;
2483 thread != NULL;
2484 thread = thread->th.th_next_pool)
2485 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002486#if KMP_OS_WINDOWS
2487 DWORD exit_val;
2488#endif
2489 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2490 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2491 __kmp_gtid_from_thread( thread ) ) );
2492 continue;
2493 }
2494#if KMP_OS_WINDOWS
2495 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2496 if (!__kmp_is_thread_alive(thread, &exit_val)) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002497 thread->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002498 continue;
2499 }
2500#endif
2501
2502 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2503
2504 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2505 __kmp_gtid_from_thread( thread ) ) );
2506
2507 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002508 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002509 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002510 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002511 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2512 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002513 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002514 }
2515 }
2516 }
2517 if (done) {
2518 break;
2519 }
2520
2521 // If we are oversubscribed,
2522 // or have waited a bit (and library mode is throughput), yield.
2523 // Pause is in the following code.
2524 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2525 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2526 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002527}
2528
2529
2530//------------------------------------------------------------------------------
2531// __kmp_task_team_setup: Create a task_team for the current team, but use
2532// an already created, unused one if it already exists.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002533void
Jonathan Peyton54127982015-11-04 21:37:48 +00002534__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002535{
2536 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2537
Jonathan Peyton54127982015-11-04 21:37:48 +00002538 // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
2539 // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
Jonathan Peyton61118492016-05-20 19:03:38 +00002540 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002541 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002542 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002543 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002544 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002545 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002546
Jonathan Peyton61118492016-05-20 19:03:38 +00002547 // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is
Jonathan Peyton54127982015-11-04 21:37:48 +00002548 // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
2549 // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
Jonathan Peyton61118492016-05-20 19:03:38 +00002550 // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for
Jonathan Peyton54127982015-11-04 21:37:48 +00002551 // serialized teams.
Jonathan Peytone1dad192015-11-30 20:05:13 +00002552 if (team->t.t_nproc > 1) {
2553 int other_team = 1 - this_thr->th.th_task_state;
2554 if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
2555 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2556 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2557 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2558 ((team != NULL) ? team->t.t_id : -1), other_team ));
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002559 }
Jonathan Peytone1dad192015-11-30 20:05:13 +00002560 else { // Leave the old task team struct in place for the upcoming region; adjust as needed
2561 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2562 if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
2563 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2564 TCW_4(task_team->tt.tt_found_tasks, FALSE);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002565#if OMP_45_ENABLED
Jonathan Peytone1dad192015-11-30 20:05:13 +00002566 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2567#endif
2568 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
2569 TCW_4(task_team->tt.tt_active, TRUE );
2570 }
2571 // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
2572 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
2573 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2574 ((team != NULL) ? team->t.t_id : -1), other_team ));
2575 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002576 }
2577}
2578
2579
2580//------------------------------------------------------------------------------
2581// __kmp_task_team_sync: Propagation of task team data from team to threads
2582// which happens just after the release phase of a team barrier. This may be
2583// called by any thread, but only for teams with # threads > 1.
2584
2585void
2586__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2587{
2588 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2589
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002590 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002591 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002592 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2593 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jonathan Peyton54127982015-11-04 21:37:48 +00002594 KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002595 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2596 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002597}
2598
2599
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002600//--------------------------------------------------------------------------------------------
2601// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
Jonathan Peyton54127982015-11-04 21:37:48 +00002602// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created.
2603// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
2604// optionally as the last argument. When wait is zero, master thread does not wait for
2605// unfinished_threads to reach 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002606void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002607__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002608 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jonathan Peyton54127982015-11-04 21:37:48 +00002609 , int wait)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002610{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002611 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002612
2613 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2614 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2615
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002616 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002617 if (wait) {
2618 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2619 __kmp_gtid_from_thread(this_thr), task_team));
2620 // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
2621 // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
2622 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2623 flag.wait(this_thr, TRUE
2624 USE_ITT_BUILD_ARG(itt_sync_obj));
2625 }
2626 // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
2627 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
2628 "setting active to false, setting local and team's pointer to NULL\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002629 __kmp_gtid_from_thread(this_thr), task_team));
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002630#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002631 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2632 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2633#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002634 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002635#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002636 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2637 KMP_MB();
2638
2639 TCW_PTR(this_thr->th.th_task_team, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002640 }
2641}
2642
2643
2644//------------------------------------------------------------------------------
2645// __kmp_tasking_barrier:
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002646// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002647// Internal function to execute all tasks prior to a regular barrier or a
2648// join barrier. It is a full barrier itself, which unfortunately turns
2649// regular barriers into double barriers and join barriers into 1 1/2
2650// barriers.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002651void
2652__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2653{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002654 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002655 int flag = FALSE;
2656 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2657
2658#if USE_ITT_BUILD
2659 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2660#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002661 kmp_flag_32 spin_flag(spin, 0U);
2662 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2663 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002664#if USE_ITT_BUILD
2665 // TODO: What about itt_sync_obj??
2666 KMP_FSYNC_SPIN_PREPARE( spin );
2667#endif /* USE_ITT_BUILD */
2668
2669 if( TCR_4(__kmp_global.g.g_done) ) {
2670 if( __kmp_global.g.g_abort )
2671 __kmp_abort_thread( );
2672 break;
2673 }
2674 KMP_YIELD( TRUE ); // GH: We always yield here
2675 }
2676#if USE_ITT_BUILD
2677 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2678#endif /* USE_ITT_BUILD */
2679}
2680
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002681
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002682#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002683
2684/* __kmp_give_task puts a task into a given thread queue if:
Jonathan Peytonff684e42016-02-11 22:58:29 +00002685 - the queue for that thread was created
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002686 - there's space in that queue
2687
2688 Because of this, __kmp_push_task needs to check if there's space after getting the lock
2689 */
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002690static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task, kmp_int32 pass )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002691{
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002692 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002693 kmp_task_team_t * task_team = taskdata->td_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002694
2695 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2696
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002697 // If task_team is NULL something went really bad...
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002698 KMP_DEBUG_ASSERT( task_team != NULL );
2699
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002700 bool result = false;
2701 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2702
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002703 if (thread_data -> td.td_deque == NULL ) {
2704 // There's no queue in this thread, go find another one
2705 // We're guaranteed that at least one thread has a queue
2706 KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2707 return result;
2708 }
2709
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002710 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002711 {
2712 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002713
2714 // if this deque is bigger than the pass ratio give a chance to another thread
2715 if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass ) return result;
2716
2717 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2718 __kmp_realloc_task_deque(thread,thread_data);
2719
2720 } else {
2721
2722 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2723
2724 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
2725 {
2726 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2727
2728 // if this deque is bigger than the pass ratio give a chance to another thread
2729 if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass )
2730 goto release_and_exit;
2731
2732 __kmp_realloc_task_deque(thread,thread_data);
2733 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002734 }
2735
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002736 // lock is held here, and there is space in the deque
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002737
2738 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2739 // Wrap index.
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002740 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002741 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2742
2743 result = true;
Jonathan Peyton1406f012015-05-22 22:35:51 +00002744 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002745
2746release_and_exit:
2747 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2748
2749 return result;
2750}
2751
2752
2753/* The finish of the a proxy tasks is divided in two pieces:
2754 - the top half is the one that can be done from a thread outside the team
2755 - the bottom half must be run from a them within the team
2756
2757 In order to run the bottom half the task gets queued back into one of the threads of the team.
2758 Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2759 So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2760 - things that can be run before queuing the bottom half
2761 - things that must be run after queuing the bottom half
2762
2763 This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2764 we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2765*/
2766
2767static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2768{
2769 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2770 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2771 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2772 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2773
2774 taskdata -> td_flags.complete = 1; // mark the task as completed
2775
2776 if ( taskdata->td_taskgroup )
2777 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2778
2779 // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
Paul Osmialowski52bef532016-05-07 00:00:00 +00002780 TCI_4(taskdata->td_incomplete_child_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002781}
2782
2783static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2784{
2785 kmp_int32 children = 0;
2786
2787 // Predecrement simulated by "- 1" calculation
2788 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2789 KMP_DEBUG_ASSERT( children >= 0 );
2790
2791 // Remove the imaginary children
Paul Osmialowski52bef532016-05-07 00:00:00 +00002792 TCD_4(taskdata->td_incomplete_child_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002793}
2794
2795static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2796{
2797 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2798 kmp_info_t * thread = __kmp_threads[ gtid ];
2799
2800 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2801 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2802
2803 // We need to wait to make sure the top half is finished
2804 // Spinning here should be ok as this should happen quickly
2805 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2806
2807 __kmp_release_deps(gtid,taskdata);
2808 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2809}
2810
2811/*!
2812@ingroup TASKING
2813@param gtid Global Thread ID of encountering thread
2814@param ptask Task which execution is completed
2815
2816Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
2817*/
2818void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2819{
2820 KMP_DEBUG_ASSERT( ptask != NULL );
2821 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2822 KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2823
2824 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2825
2826 __kmp_first_top_half_finish_proxy(taskdata);
2827 __kmp_second_top_half_finish_proxy(taskdata);
2828 __kmp_bottom_half_finish_proxy(gtid,ptask);
2829
2830 KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2831}
2832
2833/*!
2834@ingroup TASKING
2835@param ptask Task which execution is completed
2836
2837Execute the completation of a proxy task from a thread that could not belong to the team.
2838*/
2839void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2840{
2841 KMP_DEBUG_ASSERT( ptask != NULL );
2842 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2843
2844 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2845
2846 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2847
2848 __kmp_first_top_half_finish_proxy(taskdata);
2849
Jonathan Peytonff684e42016-02-11 22:58:29 +00002850 // Enqueue task to complete bottom half completion from a thread within the corresponding team
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002851 kmp_team_t * team = taskdata->td_team;
2852 kmp_int32 nthreads = team->t.t_nproc;
2853 kmp_info_t *thread;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002854
2855 //This should be similar to start_k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
2856 kmp_int32 start_k = 0;
2857 kmp_int32 pass = 1;
2858 kmp_int32 k = start_k;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002859
2860 do {
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002861 //For now we're just linearly trying to find a thread
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002862 thread = team->t.t_threads[k];
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002863 k = (k+1) % nthreads;
2864
2865 // we did a full pass through all the threads
2866 if ( k == start_k ) pass = pass << 1;
2867
2868 } while ( !__kmp_give_task( thread, k, ptask, pass ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002869
2870 __kmp_second_top_half_finish_proxy(taskdata);
2871
2872 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2873}
2874
Jonathan Peyton283a2152016-03-02 22:47:51 +00002875//---------------------------------------------------------------------------------
2876// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task for taskloop
2877//
2878// thread: allocating thread
2879// task_src: pointer to source task to be duplicated
2880// returns: a pointer to the allocated kmp_task_t structure (task).
2881kmp_task_t *
2882__kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src )
2883{
2884 kmp_task_t *task;
2885 kmp_taskdata_t *taskdata;
2886 kmp_taskdata_t *taskdata_src;
2887 kmp_taskdata_t *parent_task = thread->th.th_current_task;
2888 size_t shareds_offset;
2889 size_t task_size;
2890
2891 KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) );
2892 taskdata_src = KMP_TASK_TO_TASKDATA( task_src );
2893 KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL ); // it should not be proxy task
2894 KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT );
2895 task_size = taskdata_src->td_size_alloc;
2896
2897 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
2898 KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) );
2899 #if USE_FAST_MEMORY
2900 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size );
2901 #else
2902 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size );
2903 #endif /* USE_FAST_MEMORY */
2904 KMP_MEMCPY(taskdata, taskdata_src, task_size);
2905
2906 task = KMP_TASKDATA_TO_TASK(taskdata);
2907
2908 // Initialize new task (only specific fields not affected by memcpy)
2909 taskdata->td_task_id = KMP_GEN_TASK_ID();
2910 if( task->shareds != NULL ) { // need setup shareds pointer
2911 shareds_offset = (char*)task_src->shareds - (char*)taskdata_src;
2912 task->shareds = &((char*)taskdata)[shareds_offset];
2913 KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (sizeof(void*)-1)) == 0 );
2914 }
2915 taskdata->td_alloc_thread = thread;
2916 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
2917
2918 // Only need to keep track of child task counts if team parallel and tasking not serialized
2919 if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) {
2920 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
2921 if ( parent_task->td_taskgroup )
2922 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
2923 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
2924 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT )
2925 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
2926 }
2927
2928 KA_TRACE(20, ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
2929 thread, taskdata, taskdata->td_parent) );
2930#if OMPT_SUPPORT
2931 __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (void*)task->routine);
2932#endif
2933 return task;
2934}
2935
2936// Routine optionally generated by th ecompiler for setting the lastprivate flag
2937// and calling needed constructors for private/firstprivate objects
2938// (used to form taskloop tasks from pattern task)
2939typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
2940
2941//---------------------------------------------------------------------------------
2942// __kmp_taskloop_linear: Start tasks of the taskloop linearly
2943//
2944// loc Source location information
2945// gtid Global thread ID
2946// task Task with whole loop iteration range
2947// lb Pointer to loop lower bound
2948// ub Pointer to loop upper bound
2949// st Loop stride
2950// sched Schedule specified 0/1/2 for none/grainsize/num_tasks
2951// grainsize Schedule value if specified
2952// task_dup Tasks duplication routine
2953void
2954__kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
2955 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
2956 int sched, kmp_uint64 grainsize, void *task_dup )
2957{
Jonathan Peyton5a299da2016-06-13 16:56:41 +00002958 KMP_COUNT_BLOCK(OMP_TASKLOOP);
2959 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
Jonathan Peyton283a2152016-03-02 22:47:51 +00002960 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
2961 kmp_uint64 tc;
2962 kmp_uint64 lower = *lb; // compiler provides global bounds here
2963 kmp_uint64 upper = *ub;
Samuel Antao11e4c532016-03-12 00:55:17 +00002964 kmp_uint64 i, num_tasks = 0, extras = 0;
Jonathan Peyton283a2152016-03-02 22:47:51 +00002965 kmp_info_t *thread = __kmp_threads[gtid];
2966 kmp_taskdata_t *current_task = thread->th.th_current_task;
2967 kmp_task_t *next_task;
2968 kmp_int32 lastpriv = 0;
2969 size_t lower_offset = (char*)lb - (char*)task; // remember offset of lb in the task structure
2970 size_t upper_offset = (char*)ub - (char*)task; // remember offset of ub in the task structure
2971
2972 // compute trip count
2973 if ( st == 1 ) { // most common case
2974 tc = upper - lower + 1;
2975 } else if ( st < 0 ) {
2976 tc = (lower - upper) / (-st) + 1;
2977 } else { // st > 0
2978 tc = (upper - lower) / st + 1;
2979 }
2980 if(tc == 0) {
Jonathan Peytond4f39772016-06-21 19:18:13 +00002981 KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
Jonathan Peyton283a2152016-03-02 22:47:51 +00002982 // free the pattern task and exit
2983 __kmp_task_start( gtid, task, current_task );
2984 // do not execute anything for zero-trip loop
2985 __kmp_task_finish( gtid, task, current_task );
2986 return;
2987 }
2988
2989 // compute num_tasks/grainsize based on the input provided
2990 switch( sched ) {
2991 case 0: // no schedule clause specified, we can choose the default
2992 // let's try to schedule (team_size*10) tasks
2993 grainsize = thread->th.th_team_nproc * 10;
2994 case 2: // num_tasks provided
2995 if( grainsize > tc ) {
2996 num_tasks = tc; // too big num_tasks requested, adjust values
2997 grainsize = 1;
2998 extras = 0;
2999 } else {
3000 num_tasks = grainsize;
3001 grainsize = tc / num_tasks;
3002 extras = tc % num_tasks;
3003 }
3004 break;
3005 case 1: // grainsize provided
3006 if( grainsize > tc ) {
3007 num_tasks = 1; // too big grainsize requested, adjust values
3008 grainsize = tc;
3009 extras = 0;
3010 } else {
3011 num_tasks = tc / grainsize;
3012 grainsize = tc / num_tasks; // adjust grainsize for balanced distribution of iterations
3013 extras = tc % num_tasks;
3014 }
3015 break;
3016 default:
3017 KMP_ASSERT2(0, "unknown scheduling of taskloop");
3018 }
3019 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3020 KMP_DEBUG_ASSERT(num_tasks > extras);
3021 KMP_DEBUG_ASSERT(num_tasks > 0);
Jonathan Peytond4f39772016-06-21 19:18:13 +00003022 KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize %lld, extras %lld\n",
3023 gtid, num_tasks, grainsize, extras));
Jonathan Peyton283a2152016-03-02 22:47:51 +00003024
3025 // Main loop, launch num_tasks tasks, assign grainsize iterations each task
3026 for( i = 0; i < num_tasks; ++i ) {
3027 kmp_uint64 chunk_minus_1;
3028 if( extras == 0 ) {
3029 chunk_minus_1 = grainsize - 1;
3030 } else {
3031 chunk_minus_1 = grainsize;
3032 --extras; // first extras iterations get bigger chunk (grainsize+1)
3033 }
3034 upper = lower + st * chunk_minus_1;
3035 if( i == num_tasks - 1 ) {
3036 // schedule the last task, set lastprivate flag
3037 lastpriv = 1;
3038#if KMP_DEBUG
3039 if( st == 1 )
3040 KMP_DEBUG_ASSERT(upper == *ub);
3041 else if( st > 0 )
3042 KMP_DEBUG_ASSERT(upper+st > *ub);
3043 else
3044 KMP_DEBUG_ASSERT(upper+st < *ub);
3045#endif
3046 }
3047 next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
3048 *(kmp_uint64*)((char*)next_task + lower_offset) = lower; // adjust task-specific bounds
3049 *(kmp_uint64*)((char*)next_task + upper_offset) = upper;
3050 if( ptask_dup != NULL )
3051 ptask_dup(next_task, task, lastpriv); // set lastprivate flag, construct fistprivates, etc.
Jonathan Peytond4f39772016-06-21 19:18:13 +00003052 KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper %lld (offsets %p %p)\n",
3053 gtid, next_task, lower, upper, lower_offset, upper_offset));
Jonathan Peyton283a2152016-03-02 22:47:51 +00003054 __kmp_omp_task(gtid, next_task, true); // schedule new task
3055 lower = upper + st; // adjust lower bound for the next iteration
3056 }
3057 // free the pattern task and exit
3058 __kmp_task_start( gtid, task, current_task );
3059 // do not execute the pattern task, just do bookkeeping
3060 __kmp_task_finish( gtid, task, current_task );
3061}
3062
3063/*!
3064@ingroup TASKING
3065@param loc Source location information
3066@param gtid Global thread ID
3067@param task Task structure
3068@param if_val Value of the if clause
3069@param lb Pointer to loop lower bound
3070@param ub Pointer to loop upper bound
3071@param st Loop stride
3072@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
3073@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3074@param grainsize Schedule value if specified
3075@param task_dup Tasks duplication routine
3076
3077Execute the taskloop construct.
3078*/
3079void
3080__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
3081 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3082 int nogroup, int sched, kmp_uint64 grainsize, void *task_dup )
3083{
3084 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
3085 KMP_DEBUG_ASSERT( task != NULL );
3086
3087 KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n",
3088 gtid, taskdata, *lb, *ub, st, grainsize, sched));
3089
3090 // check if clause value first
3091 if( if_val == 0 ) { // if(0) specified, mark task as serial
3092 taskdata->td_flags.task_serial = 1;
3093 taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
3094 }
3095 if( nogroup == 0 ) {
3096 __kmpc_taskgroup( loc, gtid );
3097 }
3098
3099 if( 1 /* AC: use some heuristic here to choose task scheduling method */ ) {
3100 __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup );
3101 }
3102
3103 if( nogroup == 0 ) {
3104 __kmpc_end_taskgroup( loc, gtid );
3105 }
3106 KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
3107}
3108
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003109#endif