blob: 5e199d4b566b1511aa673931f5e25525f8d558b6 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jonathan Peyton45be4502015-08-11 21:36:41 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanove5f44922015-04-29 16:22:07 +000022#if OMPT_SUPPORT
23#include "ompt-specific.h"
24#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
Jim Cownie5e8470a2013-09-27 10:38:44 +000026/* ------------------------------------------------------------------------ */
27/* ------------------------------------------------------------------------ */
28
29
30/* forward declaration */
31static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
34
Andrey Churbanov535b6fa2015-05-07 17:41:51 +000035#ifdef OMP_41_ENABLED
36static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
37#endif
38
Jim Cownie5e8470a2013-09-27 10:38:44 +000039#ifdef BUILD_TIED_TASK_STACK
40
41//---------------------------------------------------------------------------
42// __kmp_trace_task_stack: print the tied tasks from the task stack in order
43// from top do bottom
44//
45// gtid: global thread identifier for thread containing stack
46// thread_data: thread data for task team thread containing stack
47// threshold: value above which the trace statement triggers
48// location: string identifying call site of this function (for trace)
49
50static void
51__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
52{
53 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
54 kmp_taskdata_t **stack_top = task_stack -> ts_top;
55 kmp_int32 entries = task_stack -> ts_entries;
56 kmp_taskdata_t *tied_task;
57
58 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
59 "first_block = %p, stack_top = %p \n",
60 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
61
62 KMP_DEBUG_ASSERT( stack_top != NULL );
63 KMP_DEBUG_ASSERT( entries > 0 );
64
65 while ( entries != 0 )
66 {
67 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
68 // fix up ts_top if we need to pop from previous block
69 if ( entries & TASK_STACK_INDEX_MASK == 0 )
70 {
71 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
72
73 stack_block = stack_block -> sb_prev;
74 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
75 }
76
77 // finish bookkeeping
78 stack_top--;
79 entries--;
80
81 tied_task = * stack_top;
82
83 KMP_DEBUG_ASSERT( tied_task != NULL );
84 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
85
86 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
87 "stack_top=%p, tied_task=%p\n",
88 location, gtid, entries, stack_top, tied_task ) );
89 }
90 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
91
92 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
93 location, gtid ) );
94}
95
96//---------------------------------------------------------------------------
97// __kmp_init_task_stack: initialize the task stack for the first time
98// after a thread_data structure is created.
99// It should not be necessary to do this again (assuming the stack works).
100//
101// gtid: global thread identifier of calling thread
102// thread_data: thread data for task team thread containing stack
103
104static void
105__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
106{
107 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
108 kmp_stack_block_t *first_block;
109
110 // set up the first block of the stack
111 first_block = & task_stack -> ts_first_block;
112 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
113 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
114
115 // initialize the stack to be empty
116 task_stack -> ts_entries = TASK_STACK_EMPTY;
117 first_block -> sb_next = NULL;
118 first_block -> sb_prev = NULL;
119}
120
121
122//---------------------------------------------------------------------------
123// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
124//
125// gtid: global thread identifier for calling thread
126// thread_data: thread info for thread containing stack
127
128static void
129__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
130{
131 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
132 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
133
134 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
135 // free from the second block of the stack
136 while ( stack_block != NULL ) {
137 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
138
139 stack_block -> sb_next = NULL;
140 stack_block -> sb_prev = NULL;
141 if (stack_block != & task_stack -> ts_first_block) {
142 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
143 }
144 stack_block = next_block;
145 }
146 // initialize the stack to be empty
147 task_stack -> ts_entries = 0;
148 task_stack -> ts_top = NULL;
149}
150
151
152//---------------------------------------------------------------------------
153// __kmp_push_task_stack: Push the tied task onto the task stack.
154// Grow the stack if necessary by allocating another block.
155//
156// gtid: global thread identifier for calling thread
157// thread: thread info for thread containing stack
158// tied_task: the task to push on the stack
159
160static void
161__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
162{
163 // GEH - need to consider what to do if tt_threads_data not allocated yet
164 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
165 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
166 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
167
168 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
169 return; // Don't push anything on stack if team or team tasks are serialized
170 }
171
172 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
173 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
174
175 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
176 gtid, thread, tied_task ) );
177 // Store entry
178 * (task_stack -> ts_top) = tied_task;
179
180 // Do bookkeeping for next push
181 task_stack -> ts_top++;
182 task_stack -> ts_entries++;
183
184 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
185 {
186 // Find beginning of this task block
187 kmp_stack_block_t *stack_block =
188 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
189
190 // Check if we already have a block
191 if ( stack_block -> sb_next != NULL )
192 { // reset ts_top to beginning of next block
193 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
194 }
195 else
196 { // Alloc new block and link it up
197 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
198 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
199
200 task_stack -> ts_top = & new_block -> sb_block[0];
201 stack_block -> sb_next = new_block;
202 new_block -> sb_prev = stack_block;
203 new_block -> sb_next = NULL;
204
205 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
206 gtid, tied_task, new_block ) );
207 }
208 }
209 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
210}
211
212//---------------------------------------------------------------------------
213// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
214// the task, just check to make sure it matches the ending task passed in.
215//
216// gtid: global thread identifier for the calling thread
217// thread: thread info structure containing stack
218// tied_task: the task popped off the stack
219// ending_task: the task that is ending (should match popped task)
220
221static void
222__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
223{
224 // GEH - need to consider what to do if tt_threads_data not allocated yet
225 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
226 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
227 kmp_taskdata_t *tied_task;
228
229 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
230 return; // Don't pop anything from stack if team or team tasks are serialized
231 }
232
233 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
234 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
235
236 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
237
238 // fix up ts_top if we need to pop from previous block
239 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
240 {
241 kmp_stack_block_t *stack_block =
242 (kmp_stack_block_t *) (task_stack -> ts_top) ;
243
244 stack_block = stack_block -> sb_prev;
245 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
246 }
247
248 // finish bookkeeping
249 task_stack -> ts_top--;
250 task_stack -> ts_entries--;
251
252 tied_task = * (task_stack -> ts_top );
253
254 KMP_DEBUG_ASSERT( tied_task != NULL );
255 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
256 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
257
258 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
259 return;
260}
261#endif /* BUILD_TIED_TASK_STACK */
262
263//---------------------------------------------------
264// __kmp_push_task: Add a task to the thread's deque
265
266static kmp_int32
267__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
268{
269 kmp_info_t * thread = __kmp_threads[ gtid ];
270 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
271 kmp_task_team_t * task_team = thread->th.th_task_team;
272 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
273 kmp_thread_data_t * thread_data;
274
275 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
276
Jonathan Peytone6643da2016-04-18 21:35:14 +0000277 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
278 // untied task needs to increment counter so that the task structure is not freed prematurely
279 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
280 KA_TRACE(20, ( "__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
281 gtid, counter, taskdata ) );
282 }
283
Jim Cownie5e8470a2013-09-27 10:38:44 +0000284 // The first check avoids building task_team thread data if serialized
285 if ( taskdata->td_flags.task_serial ) {
286 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
287 gtid, taskdata ) );
288 return TASK_NOT_PUSHED;
289 }
290
291 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
292 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000293 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000294 __kmp_enable_tasking( task_team, thread );
295 }
296 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
297 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
298
299 // Find tasking deque specific to encountering thread
300 thread_data = & task_team -> tt.tt_threads_data[ tid ];
301
302 // No lock needed since only owner can allocate
303 if (thread_data -> td.td_deque == NULL ) {
304 __kmp_alloc_task_deque( thread, thread_data );
305 }
306
307 // Check if deque is full
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000308 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000309 {
310 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
311 gtid, taskdata ) );
312 return TASK_NOT_PUSHED;
313 }
314
315 // Lock the deque for the task push operation
316 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
317
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000318#if OMP_41_ENABLED
319 // Need to recheck as we can get a proxy task from a thread outside of OpenMP
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000320 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000321 {
322 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
323 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
324 gtid, taskdata ) );
325 return TASK_NOT_PUSHED;
326 }
327#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000328 // Must have room since no thread can add tasks but calling thread
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000329 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE(thread_data->td) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000330#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000331
332 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
333 // Wrap index.
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000334 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000335 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
336
337 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
338
339 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
340 "task=%p ntasks=%d head=%u tail=%u\n",
341 gtid, taskdata, thread_data->td.td_deque_ntasks,
342 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
343
344 return TASK_SUCCESSFULLY_PUSHED;
345}
346
347
348//-----------------------------------------------------------------------------------------
349// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
350// this_thr: thread structure to set current_task in.
351
352void
353__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
354{
355 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
356 "curtask_parent=%p\n",
357 0, this_thr, this_thr -> th.th_current_task,
358 this_thr -> th.th_current_task -> td_parent ) );
359
360 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
361
362 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
363 "curtask_parent=%p\n",
364 0, this_thr, this_thr -> th.th_current_task,
365 this_thr -> th.th_current_task -> td_parent ) );
366}
367
368
369//---------------------------------------------------------------------------------------
370// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
371// this_thr: thread structure to set up
372// team: team for implicit task data
373// tid: thread within team to set up
374
375void
376__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
377{
378 // current task of the thread is a parent of the new just created implicit tasks of new team
379 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
380 "parent_task=%p\n",
381 tid, this_thr, this_thr->th.th_current_task,
382 team->t.t_implicit_task_taskdata[tid].td_parent ) );
383
384 KMP_DEBUG_ASSERT (this_thr != NULL);
385
386 if( tid == 0 ) {
387 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
388 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
389 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
390 }
391 } else {
392 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
393 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
394 }
395
396 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
397 "parent_task=%p\n",
398 tid, this_thr, this_thr->th.th_current_task,
399 team->t.t_implicit_task_taskdata[tid].td_parent ) );
400}
401
402
403//----------------------------------------------------------------------
404// __kmp_task_start: bookkeeping for a task starting execution
405// GTID: global thread id of calling thread
406// task: task starting execution
407// current_task: task suspending
408
409static void
410__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
411{
412 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
413 kmp_info_t * thread = __kmp_threads[ gtid ];
414
415 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
416 gtid, taskdata, current_task) );
417
418 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
419
420 // mark currently executing task as suspended
421 // TODO: GEH - make sure root team implicit task is initialized properly.
422 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
423 current_task -> td_flags.executing = 0;
424
425 // Add task to stack if tied
426#ifdef BUILD_TIED_TASK_STACK
427 if ( taskdata -> td_flags.tiedness == TASK_TIED )
428 {
429 __kmp_push_task_stack( gtid, thread, taskdata );
430 }
431#endif /* BUILD_TIED_TASK_STACK */
432
433 // mark starting task as executing and as current task
434 thread -> th.th_current_task = taskdata;
435
Jonathan Peytone6643da2016-04-18 21:35:14 +0000436 KMP_DEBUG_ASSERT( taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
437 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000438 taskdata -> td_flags.started = 1;
439 taskdata -> td_flags.executing = 1;
440 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
441 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
442
443 // GEH TODO: shouldn't we pass some sort of location identifier here?
444 // APT: yes, we will pass location here.
445 // need to store current thread state (in a thread or taskdata structure)
446 // before setting work_state, otherwise wrong state is set after end of task
447
448 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
449 gtid, taskdata ) );
450
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000451#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000452 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000453 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
454 kmp_taskdata_t *parent = taskdata->td_parent;
455 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
456 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
457 parent ? &(parent->ompt_task_info.frame) : NULL,
458 taskdata->ompt_task_info.task_id,
459 taskdata->ompt_task_info.function);
460 }
461#endif
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000462#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
463 /* OMPT emit all dependences if requested by the tool */
464 if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
465 ompt_callbacks.ompt_callback(ompt_event_task_dependences))
466 {
467 ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
468 taskdata->ompt_task_info.task_id,
469 taskdata->ompt_task_info.deps,
470 taskdata->ompt_task_info.ndeps
471 );
472 /* We can now free the allocated memory for the dependencies */
473 KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps);
474 taskdata->ompt_task_info.deps = NULL;
475 taskdata->ompt_task_info.ndeps = 0;
476 }
477#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000478
Jim Cownie5e8470a2013-09-27 10:38:44 +0000479 return;
480}
481
482
483//----------------------------------------------------------------------
484// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
485// loc_ref: source location information; points to beginning of task block.
486// gtid: global thread number.
487// task: task thunk for the started task.
488
489void
490__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
491{
492 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
493 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
494
495 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
496 gtid, loc_ref, taskdata, current_task ) );
497
Jonathan Peytone6643da2016-04-18 21:35:14 +0000498 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
499 // untied task needs to increment counter so that the task structure is not freed prematurely
500 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
501 KA_TRACE(20, ( "__kmpc_omp_task_begin_if0: T#%d untied_count (%d) incremented for task %p\n",
502 gtid, counter, taskdata ) );
503 }
504
Jim Cownie5e8470a2013-09-27 10:38:44 +0000505 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
506 __kmp_task_start( gtid, task, current_task );
507
508 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
509 gtid, loc_ref, taskdata ) );
510
511 return;
512}
513
514#ifdef TASK_UNUSED
515//----------------------------------------------------------------------
516// __kmpc_omp_task_begin: report that a given task has started execution
517// NEVER GENERATED BY COMPILER, DEPRECATED!!!
518
519void
520__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
521{
522 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
523
524 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
525 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
526
527 __kmp_task_start( gtid, task, current_task );
528
529 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
530 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
531
532 return;
533}
534#endif // TASK_UNUSED
535
536
537//-------------------------------------------------------------------------------------
538// __kmp_free_task: free the current task space and the space for shareds
539// gtid: Global thread ID of calling thread
540// taskdata: task to free
541// thread: thread data structure of caller
542
543static void
544__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
545{
546 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
547 gtid, taskdata) );
548
549 // Check to make sure all flags and counters have the correct values
550 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
551 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
552 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
553 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
554 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
555 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
556
557 taskdata->td_flags.freed = 1;
558 // deallocate the taskdata and shared variable blocks associated with this task
559 #if USE_FAST_MEMORY
560 __kmp_fast_free( thread, taskdata );
561 #else /* ! USE_FAST_MEMORY */
562 __kmp_thread_free( thread, taskdata );
563 #endif
564
565 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
566 gtid, taskdata) );
567}
568
569//-------------------------------------------------------------------------------------
570// __kmp_free_task_and_ancestors: free the current task and ancestors without children
571//
572// gtid: Global thread ID of calling thread
573// taskdata: task to free
574// thread: thread data structure of caller
575
576static void
577__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
578{
579 kmp_int32 children = 0;
580 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
581
582 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
583
584 if ( !team_or_tasking_serialized ) {
585 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
586 KMP_DEBUG_ASSERT( children >= 0 );
587 }
588
589 // Now, go up the ancestor tree to see if any ancestors can now be freed.
590 while ( children == 0 )
591 {
592 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
593
594 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
595 "and freeing itself\n", gtid, taskdata) );
596
597 // --- Deallocate my ancestor task ---
598 __kmp_free_task( gtid, taskdata, thread );
599
600 taskdata = parent_taskdata;
601
602 // Stop checking ancestors at implicit task or if tasking serialized
603 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
604 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
605 return;
606
607 if ( !team_or_tasking_serialized ) {
608 // Predecrement simulated by "- 1" calculation
609 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
610 KMP_DEBUG_ASSERT( children >= 0 );
611 }
612 }
613
614 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
615 "not freeing it yet\n", gtid, taskdata, children) );
616}
617
618//---------------------------------------------------------------------
619// __kmp_task_finish: bookkeeping to do when a task finishes execution
620// gtid: global thread ID for calling thread
621// task: task to be finished
622// resumed_task: task to be resumed. (may be NULL if task is serialized)
623
624static void
625__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
626{
627 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
628 kmp_info_t * thread = __kmp_threads[ gtid ];
629 kmp_int32 children = 0;
630
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000631#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000632 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000633 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
634 kmp_taskdata_t *parent = taskdata->td_parent;
635 ompt_callbacks.ompt_callback(ompt_event_task_end)(
636 taskdata->ompt_task_info.task_id);
637 }
638#endif
639
Jim Cownie5e8470a2013-09-27 10:38:44 +0000640 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
641 gtid, taskdata, resumed_task) );
642
643 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
644
645 // Pop task from stack if tied
646#ifdef BUILD_TIED_TASK_STACK
647 if ( taskdata -> td_flags.tiedness == TASK_TIED )
648 {
649 __kmp_pop_task_stack( gtid, thread, taskdata );
650 }
651#endif /* BUILD_TIED_TASK_STACK */
652
Jonathan Peytone6643da2016-04-18 21:35:14 +0000653 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
654 // untied task needs to check the counter so that the task structure is not freed prematurely
655 kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
656 KA_TRACE(20, ( "__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
657 gtid, counter, taskdata ) );
658 if ( counter > 0 ) {
659 // untied task is not done, to be continued possibly by other thread, do not free it now
660 if (resumed_task == NULL) {
661 KMP_DEBUG_ASSERT( taskdata->td_flags.task_serial );
662 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
663 }
664 thread->th.th_current_task = resumed_task; // restore current_task
665 resumed_task->td_flags.executing = 1; // resume previous task
666 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, resuming task %p\n",
667 gtid, taskdata, resumed_task) );
668 return;
669 }
670 }
671
Jim Cownie5e8470a2013-09-27 10:38:44 +0000672 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000673 taskdata -> td_flags.complete = 1; // mark the task as completed
674 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
675 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
676
677 // Only need to keep track of count if team parallel and tasking not serialized
678 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
679 // Predecrement simulated by "- 1" calculation
680 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
681 KMP_DEBUG_ASSERT( children >= 0 );
682#if OMP_40_ENABLED
683 if ( taskdata->td_taskgroup )
684 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000685 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000686#endif
687 }
688
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000689 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
690 // Othertwise, if a task is executed immediately from the release_deps code
691 // the flag will be reset to 1 again by this same function
692 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
693 taskdata -> td_flags.executing = 0; // suspend the finishing task
694
Jim Cownie5e8470a2013-09-27 10:38:44 +0000695 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
696 gtid, taskdata, children) );
697
Jim Cownie181b4bb2013-12-23 17:28:57 +0000698#if OMP_40_ENABLED
699 /* If the tasks' destructor thunk flag has been set, we need to invoke the
700 destructor thunk that has been generated by the compiler.
701 The code is placed here, since at this point other tasks might have been released
702 hence overlapping the destructor invokations with some other work in the
703 released tasks. The OpenMP spec is not specific on when the destructors are
704 invoked, so we should be free to choose.
Jonathan Peyton28510722016-02-25 18:04:09 +0000705 */
Jim Cownie181b4bb2013-12-23 17:28:57 +0000706 if (taskdata->td_flags.destructors_thunk) {
Jonathan Peyton28510722016-02-25 18:04:09 +0000707 kmp_routine_entry_t destr_thunk = task->data1.destructors;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000708 KMP_ASSERT(destr_thunk);
709 destr_thunk(gtid, task);
710 }
711#endif // OMP_40_ENABLED
712
Jim Cownie5e8470a2013-09-27 10:38:44 +0000713 // bookkeeping for resuming task:
714 // GEH - note tasking_ser => task_serial
715 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
716 taskdata->td_flags.task_serial);
717 if ( taskdata->td_flags.task_serial )
718 {
719 if (resumed_task == NULL) {
720 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
721 }
722 else {
723 // verify resumed task passed in points to parent
724 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
725 }
726 }
727 else {
728 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
729 }
730
731 // Free this task and then ancestor tasks if they have no children.
Jonathan Peyton727ba6e2016-01-27 21:20:26 +0000732 // Restore th_current_task first as suggested by John:
733 // johnmc: if an asynchronous inquiry peers into the runtime system
734 // it doesn't see the freed task as the current task.
735 thread->th.th_current_task = resumed_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000736 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
737
Jim Cownie5e8470a2013-09-27 10:38:44 +0000738 // TODO: GEH - make sure root team implicit task is initialized properly.
739 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
740 resumed_task->td_flags.executing = 1; // resume previous task
741
742 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
743 gtid, taskdata, resumed_task) );
744
745 return;
746}
747
748//---------------------------------------------------------------------
749// __kmpc_omp_task_complete_if0: report that a task has completed execution
750// loc_ref: source location information; points to end of task block.
751// gtid: global thread number.
752// task: task thunk for the completed task.
753
754void
755__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
756{
757 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
758 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
759
760 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
761
762 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
763 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
764
765 return;
766}
767
768#ifdef TASK_UNUSED
769//---------------------------------------------------------------------
770// __kmpc_omp_task_complete: report that a task has completed execution
771// NEVER GENERATED BY COMPILER, DEPRECATED!!!
772
773void
774__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
775{
776 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
777 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
778
779 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
780
781 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
782 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
783 return;
784}
785#endif // TASK_UNUSED
786
787
Andrey Churbanove5f44922015-04-29 16:22:07 +0000788#if OMPT_SUPPORT
789//----------------------------------------------------------------------------------------------------
790// __kmp_task_init_ompt:
Jonathan Peytonb401db62015-10-09 17:38:05 +0000791// Initialize OMPT fields maintained by a task. This will only be called after
792// ompt_tool, so we already know whether ompt is enabled or not.
Andrey Churbanove5f44922015-04-29 16:22:07 +0000793
Jonathan Peytonb401db62015-10-09 17:38:05 +0000794static inline void
795__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function )
Andrey Churbanove5f44922015-04-29 16:22:07 +0000796{
Jonathan Peytonb401db62015-10-09 17:38:05 +0000797 if (ompt_enabled) {
798 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
799 task->ompt_task_info.function = function;
800 task->ompt_task_info.frame.exit_runtime_frame = NULL;
801 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000802#if OMP_40_ENABLED
803 task->ompt_task_info.ndeps = 0;
804 task->ompt_task_info.deps = NULL;
805#endif /* OMP_40_ENABLED */
Jonathan Peytonb401db62015-10-09 17:38:05 +0000806 }
Andrey Churbanove5f44922015-04-29 16:22:07 +0000807}
808#endif
809
810
Jim Cownie5e8470a2013-09-27 10:38:44 +0000811//----------------------------------------------------------------------------------------------------
812// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
813//
814// loc_ref: reference to source location of parallel region
815// this_thr: thread data structure corresponding to implicit task
816// team: team for this_thr
817// tid: thread id of given thread within team
818// set_curr_task: TRUE if need to push current task to thread
819// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
820// TODO: Get better loc_ref. Value passed in may be NULL
821
822void
823__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
824{
825 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
826
827 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
828 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
829
830 task->td_task_id = KMP_GEN_TASK_ID();
831 task->td_team = team;
832// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
833 task->td_ident = loc_ref;
834 task->td_taskwait_ident = NULL;
835 task->td_taskwait_counter = 0;
836 task->td_taskwait_thread = 0;
837
838 task->td_flags.tiedness = TASK_TIED;
839 task->td_flags.tasktype = TASK_IMPLICIT;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000840#if OMP_41_ENABLED
841 task->td_flags.proxy = TASK_FULL;
842#endif
843
Jim Cownie5e8470a2013-09-27 10:38:44 +0000844 // All implicit tasks are executed immediately, not deferred
845 task->td_flags.task_serial = 1;
846 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
847 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
848
849 task->td_flags.started = 1;
850 task->td_flags.executing = 1;
851 task->td_flags.complete = 0;
852 task->td_flags.freed = 0;
853
Jim Cownie181b4bb2013-12-23 17:28:57 +0000854#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000855 task->td_dephash = NULL;
856 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000857#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000858
859 if (set_curr_task) { // only do this initialization the first time a thread is created
860 task->td_incomplete_child_tasks = 0;
861 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
862#if OMP_40_ENABLED
863 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
864#endif
865 __kmp_push_current_task_to_thread( this_thr, team, tid );
866 } else {
867 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
868 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
869 }
870
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000871#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +0000872 __kmp_task_init_ompt(task, tid, NULL);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000873#endif
874
Jim Cownie5e8470a2013-09-27 10:38:44 +0000875 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
876 tid, team, task ) );
877}
878
879// Round up a size to a power of two specified by val
880// Used to insert padding between structures co-allocated using a single malloc() call
881static size_t
882__kmp_round_up_to_val( size_t size, size_t val ) {
883 if ( size & ( val - 1 ) ) {
884 size &= ~ ( val - 1 );
885 if ( size <= KMP_SIZE_T_MAX - val ) {
886 size += val; // Round up if there is no overflow.
887 }; // if
888 }; // if
889 return size;
890} // __kmp_round_up_to_va
891
892
893//---------------------------------------------------------------------------------
894// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
895//
896// loc_ref: source location information
897// gtid: global thread number.
898// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
899// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
900// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
901// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
902// task_entry: Pointer to task code entry point generated by compiler.
903// returns: a pointer to the allocated kmp_task_t structure (task).
904
905kmp_task_t *
906__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
907 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
908 kmp_routine_entry_t task_entry )
909{
910 kmp_task_t *task;
911 kmp_taskdata_t *taskdata;
912 kmp_info_t *thread = __kmp_threads[ gtid ];
913 kmp_team_t *team = thread->th.th_team;
914 kmp_taskdata_t *parent_task = thread->th.th_current_task;
915 size_t shareds_offset;
916
917 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
918 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
919 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
920 sizeof_shareds, task_entry) );
921
922 if ( parent_task->td_flags.final ) {
923 if (flags->merged_if0) {
924 }
925 flags->final = 1;
926 }
927
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000928#if OMP_41_ENABLED
929 if ( flags->proxy == TASK_PROXY ) {
930 flags->tiedness = TASK_UNTIED;
931 flags->merged_if0 = 1;
932
933 /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
934 if ( (thread->th.th_task_team) == NULL ) {
935 /* This should only happen if the team is serialized
936 setup a task team and propagate it to the thread
937 */
938 KMP_DEBUG_ASSERT(team->t.t_serialized);
939 KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
Jonathan Peyton54127982015-11-04 21:37:48 +0000940 __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000941 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
942 }
943 kmp_task_team_t * task_team = thread->th.th_task_team;
944
945 /* tasking must be enabled now as the task might not be pushed */
946 if ( !KMP_TASKING_ENABLED( task_team ) ) {
947 KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
948 __kmp_enable_tasking( task_team, thread );
949 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
950 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
951 // No lock needed since only owner can allocate
952 if (thread_data -> td.td_deque == NULL ) {
953 __kmp_alloc_task_deque( thread, thread_data );
954 }
955 }
956
957 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
958 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
959 }
960#endif
961
Jim Cownie5e8470a2013-09-27 10:38:44 +0000962 // Calculate shared structure offset including padding after kmp_task_t struct
963 // to align pointers in shared struct
964 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
965 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
966
967 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
968 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
969 gtid, shareds_offset) );
970 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
971 gtid, sizeof_shareds) );
972
973 // Avoid double allocation here by combining shareds with taskdata
974 #if USE_FAST_MEMORY
975 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
976 #else /* ! USE_FAST_MEMORY */
977 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
978 #endif /* USE_FAST_MEMORY */
979
980 task = KMP_TASKDATA_TO_TASK(taskdata);
981
982 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000983#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000984 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
985 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
986#else
987 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
988 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
989#endif
990 if (sizeof_shareds > 0) {
991 // Avoid double allocation here by combining shareds with taskdata
992 task->shareds = & ((char *) taskdata)[ shareds_offset ];
993 // Make sure shareds struct is aligned to pointer size
994 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
995 } else {
996 task->shareds = NULL;
997 }
998 task->routine = task_entry;
999 task->part_id = 0; // AC: Always start with 0 part id
1000
1001 taskdata->td_task_id = KMP_GEN_TASK_ID();
1002 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001003 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001004 taskdata->td_parent = parent_task;
1005 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
Jonathan Peytone6643da2016-04-18 21:35:14 +00001006 taskdata->td_untied_count = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001007 taskdata->td_ident = loc_ref;
1008 taskdata->td_taskwait_ident = NULL;
1009 taskdata->td_taskwait_counter = 0;
1010 taskdata->td_taskwait_thread = 0;
1011 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001012#if OMP_41_ENABLED
1013 // avoid copying icvs for proxy tasks
1014 if ( flags->proxy == TASK_FULL )
1015#endif
1016 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001017
1018 taskdata->td_flags.tiedness = flags->tiedness;
1019 taskdata->td_flags.final = flags->final;
1020 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001021#if OMP_40_ENABLED
1022 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1023#endif // OMP_40_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001024#if OMP_41_ENABLED
1025 taskdata->td_flags.proxy = flags->proxy;
Jonathan Peyton134f90d2016-02-11 23:07:30 +00001026 taskdata->td_task_team = thread->th.th_task_team;
Jonathan Peyton283a2152016-03-02 22:47:51 +00001027 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001028#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001029 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1030
1031 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1032 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
1033
1034 // GEH - TODO: fix this to copy parent task's value of team_serial flag
1035 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
1036
1037 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
1038 // tasks are not left until program termination to execute. Also, it helps locality to execute
1039 // immediately.
Jonathan Peyton7881aa12015-05-21 21:16:38 +00001040 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
Jim Cownie5e8470a2013-09-27 10:38:44 +00001041 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
1042
1043 taskdata->td_flags.started = 0;
1044 taskdata->td_flags.executing = 0;
1045 taskdata->td_flags.complete = 0;
1046 taskdata->td_flags.freed = 0;
1047
1048 taskdata->td_flags.native = flags->native;
1049
1050 taskdata->td_incomplete_child_tasks = 0;
1051 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1052#if OMP_40_ENABLED
1053 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1054 taskdata->td_dephash = NULL;
1055 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001056#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001057
1058 // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
1059#if OMP_41_ENABLED
Jonathan Peyton61118492016-05-20 19:03:38 +00001060 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001061#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001062 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001063#endif
1064 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001065 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1066#if OMP_40_ENABLED
1067 if ( parent_task->td_taskgroup )
1068 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1069#endif
1070 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1071 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1072 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1073 }
1074 }
1075
1076 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1077 gtid, taskdata, taskdata->td_parent) );
1078
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001079#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +00001080 __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001081#endif
1082
Jim Cownie5e8470a2013-09-27 10:38:44 +00001083 return task;
1084}
1085
1086
1087kmp_task_t *
1088__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1089 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1090 kmp_routine_entry_t task_entry )
1091{
1092 kmp_task_t *retval;
1093 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1094
1095 input_flags->native = FALSE;
1096 // __kmp_task_alloc() sets up all other runtime flags
1097
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001098#if OMP_41_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001099 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001100 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1101 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001102 input_flags->proxy ? "proxy" : "",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001103 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001104#else
1105 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1106 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1107 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1108 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1109#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001110
1111 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1112 sizeof_shareds, task_entry );
1113
1114 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1115
1116 return retval;
1117}
1118
1119//-----------------------------------------------------------
1120// __kmp_invoke_task: invoke the specified task
1121//
1122// gtid: global thread ID of caller
1123// task: the task to invoke
1124// current_task: the task to resume after task invokation
1125
1126static void
1127__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1128{
1129 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001130 kmp_uint64 cur_time;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001131#if OMP_40_ENABLED
1132 int discard = 0 /* false */;
1133#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001134 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1135 gtid, taskdata, current_task) );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00001136 KMP_DEBUG_ASSERT(task);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001137#if OMP_41_ENABLED
1138 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1139 taskdata->td_flags.complete == 1)
1140 {
1141 // This is a proxy task that was already completed but it needs to run
1142 // its bottom-half finish
1143 KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1144 gtid, taskdata) );
1145
1146 __kmp_bottom_half_finish_proxy(gtid,task);
1147
1148 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1149
1150 return;
1151 }
1152#endif
1153
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001154#if USE_ITT_BUILD && USE_ITT_NOTIFY
1155 if(__kmp_forkjoin_frames_mode == 3) {
1156 // Get the current time stamp to measure task execution time to correct barrier imbalance time
1157 cur_time = __itt_get_timestamp();
1158 }
1159#endif
1160
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001161#if OMP_41_ENABLED
1162 // Proxy tasks are not handled by the runtime
1163 if ( taskdata->td_flags.proxy != TASK_PROXY )
1164#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001165 __kmp_task_start( gtid, task, current_task );
1166
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001167#if OMPT_SUPPORT
1168 ompt_thread_info_t oldInfo;
1169 kmp_info_t * thread;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001170 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001171 // Store the threads states and restore them after the task
1172 thread = __kmp_threads[ gtid ];
1173 oldInfo = thread->th.ompt_thread_info;
1174 thread->th.ompt_thread_info.wait_id = 0;
1175 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1176 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1177 }
1178#endif
1179
Jim Cownie181b4bb2013-12-23 17:28:57 +00001180#if OMP_40_ENABLED
1181 // TODO: cancel tasks if the parallel region has also been cancelled
1182 // TODO: check if this sequence can be hoisted above __kmp_task_start
1183 // if cancellation has been enabled for this run ...
1184 if (__kmp_omp_cancellation) {
1185 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1186 kmp_team_t * this_team = this_thr->th.th_team;
1187 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1188 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001189 KMP_COUNT_BLOCK(TASK_cancelled);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001190 // this task belongs to a task group and we need to cancel it
1191 discard = 1 /* true */;
1192 }
1193 }
1194
Jim Cownie5e8470a2013-09-27 10:38:44 +00001195 //
1196 // Invoke the task routine and pass in relevant data.
1197 // Thunks generated by gcc take a different argument list.
1198 //
Jim Cownie181b4bb2013-12-23 17:28:57 +00001199 if (!discard) {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001200#if KMP_STATS_ENABLED
Jonathan Peyton45be4502015-08-11 21:36:41 +00001201 KMP_COUNT_BLOCK(TASK_executed);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001202 switch(KMP_GET_THREAD_STATE()) {
1203 case FORK_JOIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); break;
1204 case PLAIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); break;
1205 case TASKYIELD: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); break;
1206 case TASKWAIT: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); break;
1207 case TASKGROUP: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); break;
1208 default: KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); break;
1209 }
1210#endif // KMP_STATS_ENABLED
Jim Cownie181b4bb2013-12-23 17:28:57 +00001211#endif // OMP_40_ENABLED
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001212
1213#if OMPT_SUPPORT && OMPT_TRACE
1214 /* let OMPT know that we're about to run this task */
1215 if (ompt_enabled &&
1216 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1217 {
1218 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1219 current_task->ompt_task_info.task_id,
1220 taskdata->ompt_task_info.task_id);
1221 }
1222#endif
1223
Jim Cownie5e8470a2013-09-27 10:38:44 +00001224#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +00001225 if (taskdata->td_flags.native) {
1226 ((void (*)(void *))(*(task->routine)))(task->shareds);
1227 }
1228 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001229#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001230 {
1231 (*(task->routine))(gtid, task);
1232 }
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001233 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001234
1235#if OMPT_SUPPORT && OMPT_TRACE
1236 /* let OMPT know that we're returning to the callee task */
1237 if (ompt_enabled &&
1238 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1239 {
1240 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1241 taskdata->ompt_task_info.task_id,
1242 current_task->ompt_task_info.task_id);
1243 }
1244#endif
1245
Jim Cownie181b4bb2013-12-23 17:28:57 +00001246#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001247 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001248#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001249
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001250
1251#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001252 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001253 thread->th.ompt_thread_info = oldInfo;
1254 taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1255 }
1256#endif
1257
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001258#if OMP_41_ENABLED
1259 // Proxy tasks are not handled by the runtime
1260 if ( taskdata->td_flags.proxy != TASK_PROXY )
1261#endif
1262 __kmp_task_finish( gtid, task, current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001263
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001264#if USE_ITT_BUILD && USE_ITT_NOTIFY
1265 // Barrier imbalance - correct arrive time after the task finished
1266 if(__kmp_forkjoin_frames_mode == 3) {
1267 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1268 if(this_thr->th.th_bar_arrive_time) {
1269 this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1270 }
1271 }
1272#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001273 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001274 gtid, taskdata, current_task) );
1275 return;
1276}
1277
1278//-----------------------------------------------------------------------
1279// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1280//
1281// loc_ref: location of original task pragma (ignored)
1282// gtid: Global Thread ID of encountering thread
1283// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1284// Returns:
1285// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1286// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1287
1288kmp_int32
1289__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1290{
1291 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1292
1293 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1294 gtid, loc_ref, new_taskdata ) );
1295
1296 /* Should we execute the new task or queue it? For now, let's just always try to
1297 queue it. If the queue fills up, then we'll execute it. */
1298
1299 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1300 { // Execute this task immediately
1301 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1302 new_taskdata->td_flags.task_serial = 1;
1303 __kmp_invoke_task( gtid, new_task, current_task );
1304 }
1305
1306 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1307 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1308 new_taskdata ) );
1309
1310 return TASK_CURRENT_NOT_QUEUED;
1311}
1312
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001313//---------------------------------------------------------------------
1314// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1315// gtid: Global Thread ID of encountering thread
1316// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1317// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1318// returns:
1319//
1320// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1321// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1322kmp_int32
1323__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1324{
1325 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1326
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001327#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001328 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001329 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1330 __builtin_frame_address(0);
1331 }
1332#endif
1333
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001334 /* Should we execute the new task or queue it? For now, let's just always try to
1335 queue it. If the queue fills up, then we'll execute it. */
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001336#if OMP_41_ENABLED
1337 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1338#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001339 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001340#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001341 { // Execute this task immediately
1342 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1343 if ( serialize_immediate )
1344 new_taskdata -> td_flags.task_serial = 1;
1345 __kmp_invoke_task( gtid, new_task, current_task );
1346 }
1347
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001348#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001349 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001350 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1351 }
1352#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001353
1354 return TASK_CURRENT_NOT_QUEUED;
1355}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001356
1357//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001358// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1359// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001360// loc_ref: location of original task pragma (ignored)
1361// gtid: Global Thread ID of encountering thread
1362// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1363// returns:
1364//
1365// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1366// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1367
1368kmp_int32
1369__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1370{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001371 kmp_int32 res;
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001372 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001373
Jonathan Peytond2eb3c72015-08-26 20:02:21 +00001374#if KMP_DEBUG
1375 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1376#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001377 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1378 gtid, loc_ref, new_taskdata ) );
1379
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001380 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001381
1382 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1383 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001384 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001385}
1386
Jim Cownie5e8470a2013-09-27 10:38:44 +00001387//-------------------------------------------------------------------------------------
1388// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1389
1390kmp_int32
1391__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1392{
1393 kmp_taskdata_t * taskdata;
1394 kmp_info_t * thread;
1395 int thread_finished = FALSE;
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001396 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001397
Jonathan Peyton54127982015-11-04 21:37:48 +00001398 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001399
1400 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1401 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1402
1403 thread = __kmp_threads[ gtid ];
1404 taskdata = thread -> th.th_current_task;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001405
1406#if OMPT_SUPPORT && OMPT_TRACE
1407 ompt_task_id_t my_task_id;
1408 ompt_parallel_id_t my_parallel_id;
Jonathan Peyton61118492016-05-20 19:03:38 +00001409
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001410 if (ompt_enabled) {
1411 kmp_team_t *team = thread->th.th_team;
1412 my_task_id = taskdata->ompt_task_info.task_id;
1413 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peyton61118492016-05-20 19:03:38 +00001414
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001415 taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001416 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1417 ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
1418 my_parallel_id, my_task_id);
1419 }
1420 }
1421#endif
1422
Jim Cownie5e8470a2013-09-27 10:38:44 +00001423#if USE_ITT_BUILD
1424 // Note: These values are used by ITT events as well.
1425#endif /* USE_ITT_BUILD */
1426 taskdata->td_taskwait_counter += 1;
1427 taskdata->td_taskwait_ident = loc_ref;
1428 taskdata->td_taskwait_thread = gtid + 1;
1429
1430#if USE_ITT_BUILD
1431 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1432 if ( itt_sync_obj != NULL )
1433 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1434#endif /* USE_ITT_BUILD */
1435
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001436#if OMP_41_ENABLED
Jonathan Peyton61118492016-05-20 19:03:38 +00001437 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001438#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001439 if ( ! taskdata->td_flags.team_serial )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001440#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001441 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001442 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001443 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001444 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001445 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1446 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001447 }
1448 }
1449#if USE_ITT_BUILD
1450 if ( itt_sync_obj != NULL )
1451 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1452#endif /* USE_ITT_BUILD */
1453
1454 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1455 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001456
1457#if OMPT_SUPPORT && OMPT_TRACE
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001458 if (ompt_enabled) {
1459 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1460 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001461 my_parallel_id, my_task_id);
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001462 }
1463 taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001464 }
1465#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001466 }
1467
1468 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1469 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1470
1471 return TASK_CURRENT_NOT_QUEUED;
1472}
1473
1474
1475//-------------------------------------------------
1476// __kmpc_omp_taskyield: switch to a different task
1477
1478kmp_int32
1479__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1480{
1481 kmp_taskdata_t * taskdata;
1482 kmp_info_t * thread;
1483 int thread_finished = FALSE;
1484
Jonathan Peyton45be4502015-08-11 21:36:41 +00001485 KMP_COUNT_BLOCK(OMP_TASKYIELD);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001486 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001487
Jim Cownie5e8470a2013-09-27 10:38:44 +00001488 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1489 gtid, loc_ref, end_part) );
1490
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001491 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001492 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1493
1494 thread = __kmp_threads[ gtid ];
1495 taskdata = thread -> th.th_current_task;
1496 // Should we model this as a task wait or not?
1497#if USE_ITT_BUILD
1498 // Note: These values are used by ITT events as well.
1499#endif /* USE_ITT_BUILD */
1500 taskdata->td_taskwait_counter += 1;
1501 taskdata->td_taskwait_ident = loc_ref;
1502 taskdata->td_taskwait_thread = gtid + 1;
1503
1504#if USE_ITT_BUILD
1505 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1506 if ( itt_sync_obj != NULL )
1507 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1508#endif /* USE_ITT_BUILD */
1509 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001510 kmp_task_team_t * task_team = thread->th.th_task_team;
1511 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001512 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001513 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1514 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1515 }
1516 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001517 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001518#if USE_ITT_BUILD
1519 if ( itt_sync_obj != NULL )
1520 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1521#endif /* USE_ITT_BUILD */
1522
1523 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1524 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1525 }
1526
1527 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1528 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1529
1530 return TASK_CURRENT_NOT_QUEUED;
1531}
1532
1533
1534#if OMP_40_ENABLED
1535//-------------------------------------------------------------------------------------
1536// __kmpc_taskgroup: Start a new taskgroup
1537
1538void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001539__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001540{
1541 kmp_info_t * thread = __kmp_threads[ gtid ];
1542 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1543 kmp_taskgroup_t * tg_new =
1544 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1545 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1546 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001547 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001548 tg_new->parent = taskdata->td_taskgroup;
1549 taskdata->td_taskgroup = tg_new;
1550}
1551
1552
1553//-------------------------------------------------------------------------------------
1554// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1555// and its descendants are complete
1556
1557void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001558__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001559{
1560 kmp_info_t * thread = __kmp_threads[ gtid ];
1561 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1562 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1563 int thread_finished = FALSE;
1564
1565 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1566 KMP_DEBUG_ASSERT( taskgroup != NULL );
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001567 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001568
1569 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1570#if USE_ITT_BUILD
1571 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1572 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1573 if ( itt_sync_obj != NULL )
1574 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1575#endif /* USE_ITT_BUILD */
1576
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001577#if OMP_41_ENABLED
Jonathan Peyton61118492016-05-20 19:03:38 +00001578 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001579#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001580 if ( ! taskdata->td_flags.team_serial )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001581#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001582 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001583 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001584 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001585 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1586 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001587 }
1588 }
1589
1590#if USE_ITT_BUILD
1591 if ( itt_sync_obj != NULL )
1592 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1593#endif /* USE_ITT_BUILD */
1594 }
1595 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1596
1597 // Restore parent taskgroup for the current task
1598 taskdata->td_taskgroup = taskgroup->parent;
1599 __kmp_thread_free( thread, taskgroup );
1600
1601 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1602}
1603#endif
1604
1605
1606//------------------------------------------------------
1607// __kmp_remove_my_task: remove a task from my own deque
1608
1609static kmp_task_t *
1610__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1611 kmp_int32 is_constrained )
1612{
1613 kmp_task_t * task;
1614 kmp_taskdata_t * taskdata;
1615 kmp_thread_data_t *thread_data;
1616 kmp_uint32 tail;
1617
1618 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1619 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1620
1621 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1622
1623 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1624 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1625 thread_data->td.td_deque_tail) );
1626
1627 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1628 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1629 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1630 thread_data->td.td_deque_tail) );
1631 return NULL;
1632 }
1633
1634 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1635
1636 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1637 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1638 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1639 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1640 thread_data->td.td_deque_tail) );
1641 return NULL;
1642 }
1643
Jonathan Peytonf4f96952016-05-31 19:07:00 +00001644 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(thread_data->td); // Wrap index.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001645 taskdata = thread_data -> td.td_deque[ tail ];
1646
1647 if (is_constrained) {
1648 // we need to check if the candidate obeys task scheduling constraint:
1649 // only child of current task can be scheduled
1650 kmp_taskdata_t * current = thread->th.th_current_task;
1651 kmp_int32 level = current->td_level;
1652 kmp_taskdata_t * parent = taskdata->td_parent;
1653 while ( parent != current && parent->td_level > level ) {
1654 parent = parent->td_parent; // check generation up to the level of the current task
1655 KMP_DEBUG_ASSERT(parent != NULL);
1656 }
1657 if ( parent != current ) {
Jonathan Peytonb6f0f522016-06-09 18:51:17 +00001658 // If the tail task is not a child, then no other child can appear in the deque.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001659 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1660 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1661 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1662 thread_data->td.td_deque_tail) );
1663 return NULL;
1664 }
1665 }
1666
1667 thread_data -> td.td_deque_tail = tail;
1668 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1669
1670 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1671
1672 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1673 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1674 thread_data->td.td_deque_tail) );
1675
1676 task = KMP_TASKDATA_TO_TASK( taskdata );
1677 return task;
1678}
1679
1680
1681//-----------------------------------------------------------
1682// __kmp_steal_task: remove a task from another thread's deque
1683// Assume that calling thread has already checked existence of
1684// task_team thread_data before calling this routine.
1685
1686static kmp_task_t *
1687__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1688 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1689 kmp_int32 is_constrained )
1690{
1691 kmp_task_t * task;
1692 kmp_taskdata_t * taskdata;
1693 kmp_thread_data_t *victim_td, *threads_data;
Jonathan Peyton7c4d66d2015-06-08 20:01:14 +00001694 kmp_int32 victim_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001695
1696 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1697
1698 threads_data = task_team -> tt.tt_threads_data;
1699 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1700
1701 victim_tid = victim->th.th_info.ds.ds_tid;
1702 victim_td = & threads_data[ victim_tid ];
1703
1704 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1705 "head=%u tail=%u\n",
1706 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1707 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1708
1709 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1710 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1711 {
1712 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1713 "ntasks=%d head=%u tail=%u\n",
1714 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1715 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1716 return NULL;
1717 }
1718
1719 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1720
1721 // Check again after we acquire the lock
1722 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1723 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1724 {
1725 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1726 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1727 "ntasks=%d head=%u tail=%u\n",
1728 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1729 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1730 return NULL;
1731 }
1732
1733 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1734
1735 if ( !is_constrained ) {
1736 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001737 KMP_ASSERT(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001738 // Bump head pointer and Wrap.
Jonathan Peytonf4f96952016-05-31 19:07:00 +00001739 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK(victim_td->td);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001740 } else {
1741 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
Jonathan Peytonf4f96952016-05-31 19:07:00 +00001742 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(victim_td->td); // Wrap index.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001743 taskdata = victim_td -> td.td_deque[ tail ];
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001744 KMP_ASSERT(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001745 // we need to check if the candidate obeys task scheduling constraint:
1746 // only child of current task can be scheduled
1747 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1748 kmp_int32 level = current->td_level;
1749 kmp_taskdata_t * parent = taskdata->td_parent;
1750 while ( parent != current && parent->td_level > level ) {
1751 parent = parent->td_parent; // check generation up to the level of the current task
1752 KMP_DEBUG_ASSERT(parent != NULL);
1753 }
1754 if ( parent != current ) {
1755 // If the tail task is not a child, then no other childs can appear in the deque (?).
1756 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1757 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1758 "ntasks=%d head=%u tail=%u\n",
1759 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1760 task_team, victim_td->td.td_deque_ntasks,
1761 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1762 return NULL;
1763 }
1764 victim_td -> td.td_deque_tail = tail;
1765 }
1766 if (*thread_finished) {
1767 // We need to un-mark this victim as a finished victim. This must be done before
1768 // releasing the lock, or else other threads (starting with the master victim)
1769 // might be prematurely released from the barrier!!!
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001770 kmp_uint32 count;
1771
1772 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001773
1774 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1775 gtid, count + 1, task_team) );
1776
1777 *thread_finished = FALSE;
1778 }
1779 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1780
1781 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1782
Jonathan Peyton45be4502015-08-11 21:36:41 +00001783 KMP_COUNT_BLOCK(TASK_stolen);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001784 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001785 "ntasks=%d head=%u tail=%u\n",
1786 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1787 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1788 victim_td->td.td_deque_tail) );
1789
1790 task = KMP_TASKDATA_TO_TASK( taskdata );
1791 return task;
1792}
1793
1794
1795//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001796// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001797// is statisfied (return true) or there are none left (return false).
1798// final_spin is TRUE if this is the spin at the release barrier.
1799// thread_finished indicates whether the thread is finished executing all
1800// the tasks it has on its deque, and is at the release barrier.
1801// spinner is the location on which to spin.
1802// spinner == NULL means only execute a single task and return.
1803// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001804template <class C>
Jonathan Peyton61118492016-05-20 19:03:38 +00001805static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001806 int *thread_finished
1807 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001808{
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001809 kmp_task_team_t * task_team = thread->th.th_task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001810 kmp_thread_data_t * threads_data;
1811 kmp_task_t * task;
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001812 kmp_info_t * other_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001813 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1814 volatile kmp_uint32 * unfinished_threads;
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001815 kmp_int32 nthreads, victim=-2, use_own_tasks=1, new_victim=0, tid=thread->th.th_info.ds.ds_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001816
1817 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1818 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1819
Jonathan Peyton54127982015-11-04 21:37:48 +00001820 if (task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001821
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001822 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001823 gtid, final_spin, *thread_finished) );
1824
1825 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1826 KMP_DEBUG_ASSERT( threads_data != NULL );
1827
1828 nthreads = task_team -> tt.tt_nproc;
1829 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001830#if OMP_41_ENABLED
1831 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1832#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001833 KMP_DEBUG_ASSERT( nthreads > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001834#endif
Paul Osmialowski9cc353e2016-06-01 09:59:26 +00001835 KMP_DEBUG_ASSERT( (int)(TCR_4(*unfinished_threads)) >= 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001836
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001837 while (1) { // Outer loop keeps trying to find tasks in case of single thread getting tasks from target constructs
1838 while (1) { // Inner loop to find a task and execute it
1839 task = NULL;
1840 if (use_own_tasks) { // check on own queue first
1841 task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001842 }
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001843 if ((task == NULL) && (nthreads > 1)) { // Steal a task
1844 int asleep = 1;
1845 use_own_tasks = 0;
1846 // Try to steal from the last place I stole from successfully.
1847 if (victim == -2) { // haven't stolen anything yet
1848 victim = threads_data[tid].td.td_deque_last_stolen;
1849 if (victim != -1) // if we have a last stolen from victim, get the thread
1850 other_thread = threads_data[victim].td.td_thr;
1851 }
1852 if (victim != -1) { // found last victim
1853 asleep = 0;
1854 }
1855 else if (!new_victim) { // no recent steals and we haven't already used a new victim; select a random thread
1856 do { // Find a different thread to steal work from.
1857 // Pick a random thread. Initial plan was to cycle through all the threads, and only return if
1858 // we tried to steal from every thread, and failed. Arch says that's not such a great idea.
1859 victim = __kmp_get_random(thread) % (nthreads - 1);
1860 if (victim >= tid) {
1861 ++victim; // Adjusts random distribution to exclude self
1862 }
1863 // Found a potential victim
1864 other_thread = threads_data[victim].td.td_thr;
1865 // There is a slight chance that __kmp_enable_tasking() did not wake up all threads
1866 // waiting at the barrier. If victim is sleeping, then wake it up. Since we were going to
1867 // pay the cache miss penalty for referencing another thread's kmp_info_t struct anyway,
1868 // the check shouldn't cost too much performance at this point. In extra barrier mode, tasks
1869 // do not sleep at the separate tasking barrier, so this isn't a problem.
1870 asleep = 0;
1871 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1872 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1873 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) {
1874 asleep = 1;
1875 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
1876 // A sleeping thread should not have any tasks on it's queue. There is a slight
1877 // possibility that it resumes, steals a task from another thread, which spawns more
1878 // tasks, all in the time that it takes this thread to check => don't write an assertion
1879 // that the victim's queue is empty. Try stealing from a different thread.
1880 }
1881 } while (asleep);
1882 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001883
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001884 if (!asleep) {
1885 // We have a victim to try to steal from
1886 task = __kmp_steal_task(other_thread, gtid, task_team, unfinished_threads, thread_finished, is_constrained);
1887 }
1888 if (task != NULL) { // set last stolen to victim
1889 if (threads_data[tid].td.td_deque_last_stolen != victim) {
1890 threads_data[tid].td.td_deque_last_stolen = victim;
1891 // The pre-refactored code did not try more than 1 successful new vicitm,
1892 // unless the last one generated more local tasks; new_victim keeps track of this
1893 new_victim = 1;
1894 }
1895 }
1896 else { // No tasks found; unset last_stolen
1897 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
1898 victim = -2; // no successful victim found
1899 }
1900 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001901
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001902 if (task == NULL) // break out of tasking loop
1903 break;
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001904
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001905 // Found a task; execute it
Jim Cownie5e8470a2013-09-27 10:38:44 +00001906#if USE_ITT_BUILD && USE_ITT_NOTIFY
1907 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001908 if ( itt_sync_obj == NULL ) { // we are at fork barrier where we could not get the object reliably
1909 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001910 }
1911 __kmp_itt_task_starting( itt_sync_obj );
1912 }
1913#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1914 __kmp_invoke_task( gtid, task, current_task );
1915#if USE_ITT_BUILD
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001916 if ( itt_sync_obj != NULL ) __kmp_itt_task_finished( itt_sync_obj );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001917#endif /* USE_ITT_BUILD */
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001918 // If this thread is only partway through the barrier and the condition is met, then return now,
1919 // so that the barrier gather/release pattern can proceed. If this thread is in the last spin loop
1920 // in the barrier, waiting to be released, we know that the termination condition will not be
1921 // satisified, so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001922 if (flag == NULL || (!final_spin && flag->done_check())) {
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001923 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001924 return TRUE;
1925 }
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001926 if (thread->th.th_task_team == NULL) {
1927 break;
1928 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001929 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001930 // If execution of a stolen task results in more tasks being placed on our run queue, reset use_own_tasks
1931 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
1932 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", gtid));
1933 use_own_tasks = 1;
1934 new_victim = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001935 }
1936 }
1937
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001938 // The task source has been exhausted. If in final spin loop of barrier, check if termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001939#if OMP_41_ENABLED
1940 // The work queue may be empty but there might be proxy tasks still executing
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001941 if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001942#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001943 if (final_spin)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001944#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001945 {
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001946 // First, decrement the #unfinished threads, if that has not already been done. This decrement
1947 // might be to the spin location, and result in the termination condition being satisfied.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001948 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001949 kmp_uint32 count;
1950
1951 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001952 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001953 gtid, count, task_team) );
1954 *thread_finished = TRUE;
1955 }
1956
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001957 // It is now unsafe to reference thread->th.th_team !!!
1958 // Decrementing task_team->tt.tt_unfinished_threads can allow the master thread to pass through
1959 // the barrier, where it might reset each thread's th.th_team field for the next parallel region.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001960 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001961 if (flag != NULL && flag->done_check()) {
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001962 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001963 return TRUE;
1964 }
1965 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001966
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00001967 // If this thread's task team is NULL, master has recognized that there are no more tasks; bail out
1968 if (thread->th.th_task_team == NULL) {
1969 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid) );
1970 return FALSE;
1971 }
1972
1973#if OMP_41_ENABLED
1974 // We could be getting tasks from target constructs; if this is the only thread, keep trying to execute
1975 // tasks from own queue
1976 if (nthreads == 1)
1977 use_own_tasks = 1;
1978 else
1979#endif
1980 {
1981 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid) );
1982 return FALSE;
1983 }
1984 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001985}
1986
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001987int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1988 int *thread_finished
1989 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1990{
1991 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1992 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1993}
1994
1995int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
1996 int *thread_finished
1997 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1998{
1999 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2000 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2001}
2002
2003int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
2004 int *thread_finished
2005 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2006{
2007 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2008 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2009}
2010
2011
Jim Cownie5e8470a2013-09-27 10:38:44 +00002012
2013//-----------------------------------------------------------------------------
2014// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2015// next barrier so they can assist in executing enqueued tasks.
2016// First thread in allocates the task team atomically.
2017
2018static void
2019__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2020{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002021 kmp_thread_data_t *threads_data;
2022 int nthreads, i, is_init_thread;
2023
2024 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2025 __kmp_gtid_from_thread( this_thr ) ) );
2026
2027 KMP_DEBUG_ASSERT(task_team != NULL);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002028 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002029
2030 nthreads = task_team->tt.tt_nproc;
2031 KMP_DEBUG_ASSERT(nthreads > 0);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002032 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002033
2034 // Allocate or increase the size of threads_data if necessary
2035 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2036
2037 if (!is_init_thread) {
2038 // Some other thread already set up the array.
2039 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2040 __kmp_gtid_from_thread( this_thr ) ) );
2041 return;
2042 }
2043 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2044 KMP_DEBUG_ASSERT( threads_data != NULL );
2045
2046 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2047 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2048 {
2049 // Release any threads sleeping at the barrier, so that they can steal
2050 // tasks and execute them. In extra barrier mode, tasks do not sleep
2051 // at the separate tasking barrier, so this isn't a problem.
2052 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002053 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002054 kmp_info_t *thread = threads_data[i].td.td_thr;
2055
2056 if (i == this_thr->th.th_info.ds.ds_tid) {
2057 continue;
2058 }
2059 // Since we haven't locked the thread's suspend mutex lock at this
2060 // point, there is a small window where a thread might be putting
2061 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002062 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002063 // see if other threads are sleeping (using the same random
2064 // mechanism that is used for task stealing) and awakens them if
2065 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002066 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002067 {
2068 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2069 __kmp_gtid_from_thread( this_thr ),
2070 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002071 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002072 }
2073 else {
2074 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2075 __kmp_gtid_from_thread( this_thr ),
2076 __kmp_gtid_from_thread( thread ) ) );
2077 }
2078 }
2079 }
2080
2081 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2082 __kmp_gtid_from_thread( this_thr ) ) );
2083}
2084
2085
2086/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002087/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00002088 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2089 * like a shadow of the kmp_team_t data struct, with a different lifetime.
2090 * After a child * thread checks into a barrier and calls __kmp_release() from
2091 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2092 * longer assume that the kmp_team_t structure is intact (at any moment, the
2093 * master thread may exit the barrier code and free the team data structure,
2094 * and return the threads to the thread pool).
2095 *
2096 * This does not work with the the tasking code, as the thread is still
2097 * expected to participate in the execution of any tasks that may have been
2098 * spawned my a member of the team, and the thread still needs access to all
2099 * to each thread in the team, so that it can steal work from it.
2100 *
2101 * Enter the existence of the kmp_task_team_t struct. It employs a reference
2102 * counting mechanims, and is allocated by the master thread before calling
2103 * __kmp_<barrier_kind>_release, and then is release by the last thread to
2104 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2105 * of the kmp_task_team_t structs for consecutive barriers can overlap
2106 * (and will, unless the master thread is the last thread to exit the barrier
2107 * release phase, which is not typical).
2108 *
2109 * The existence of such a struct is useful outside the context of tasking,
2110 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2111 * so that any performance differences show up when comparing the 2.5 vs. 3.0
2112 * libraries.
2113 *
2114 * We currently use the existence of the threads array as an indicator that
2115 * tasks were spawned since the last barrier. If the structure is to be
2116 * useful outside the context of tasking, then this will have to change, but
2117 * not settting the field minimizes the performance impact of tasking on
2118 * barriers, when no explicit tasks were spawned (pushed, actually).
2119 */
2120
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002121
Jim Cownie5e8470a2013-09-27 10:38:44 +00002122static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2123// Lock for task team data structures
2124static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2125
2126
2127//------------------------------------------------------------------------------
2128// __kmp_alloc_task_deque:
2129// Allocates a task deque for a particular thread, and initialize the necessary
2130// data structures relating to the deque. This only happens once per thread
2131// per task team since task teams are recycled.
2132// No lock is needed during allocation since each thread allocates its own
2133// deque.
2134
2135static void
2136__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2137{
2138 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2139 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2140
2141 // Initialize last stolen task field to "none"
2142 thread_data -> td.td_deque_last_stolen = -1;
2143
2144 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2145 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2146 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2147
2148 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002149 __kmp_gtid_from_thread( thread ), INITIAL_TASK_DEQUE_SIZE, thread_data ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002150 // Allocate space for task deque, and zero the deque
2151 // Cannot use __kmp_thread_calloc() because threads not around for
2152 // kmp_reap_task_team( ).
2153 thread_data -> td.td_deque = (kmp_taskdata_t **)
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002154 __kmp_allocate( INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2155 thread_data -> td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002156}
2157
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002158//------------------------------------------------------------------------------
2159// __kmp_realloc_task_deque:
2160// Re-allocates a task deque for a particular thread, copies the content from the old deque
2161// and adjusts the necessary data structures relating to the deque.
2162// This operation must be done with a the deque_lock being held
2163
2164static void __kmp_realloc_task_deque ( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2165{
2166 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
2167 kmp_int32 new_size = 2 * size;
2168
2169 KE_TRACE( 10, ( "__kmp_realloc_task_deque: T#%d reallocating deque[from %d to %d] for thread_data %p\n",
2170 __kmp_gtid_from_thread( thread ), size, new_size, thread_data ) );
2171
2172 kmp_taskdata_t ** new_deque = (kmp_taskdata_t **) __kmp_allocate( new_size * sizeof(kmp_taskdata_t *));
2173
2174 int i,j;
2175 for ( i = thread_data->td.td_deque_head, j = 0; j < size; i = (i+1) & TASK_DEQUE_MASK(thread_data->td), j++ )
2176 new_deque[j] = thread_data->td.td_deque[i];
2177
2178 __kmp_free(thread_data->td.td_deque);
2179
2180 thread_data -> td.td_deque_head = 0;
2181 thread_data -> td.td_deque_tail = size;
2182 thread_data -> td.td_deque = new_deque;
2183 thread_data -> td.td_deque_size = new_size;
2184}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002185
2186//------------------------------------------------------------------------------
2187// __kmp_free_task_deque:
2188// Deallocates a task deque for a particular thread.
2189// Happens at library deallocation so don't need to reset all thread data fields.
2190
2191static void
2192__kmp_free_task_deque( kmp_thread_data_t *thread_data )
2193{
2194 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2195
2196 if ( thread_data -> td.td_deque != NULL ) {
2197 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2198 __kmp_free( thread_data -> td.td_deque );
2199 thread_data -> td.td_deque = NULL;
2200 }
2201 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2202
2203#ifdef BUILD_TIED_TASK_STACK
2204 // GEH: Figure out what to do here for td_susp_tied_tasks
2205 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2206 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2207 }
2208#endif // BUILD_TIED_TASK_STACK
2209}
2210
2211
2212//------------------------------------------------------------------------------
2213// __kmp_realloc_task_threads_data:
2214// Allocates a threads_data array for a task team, either by allocating an initial
2215// array or enlarging an existing array. Only the first thread to get the lock
2216// allocs or enlarges the array and re-initializes the array eleemnts.
2217// That thread returns "TRUE", the rest return "FALSE".
2218// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2219// The current size is given by task_team -> tt.tt_max_threads.
2220
2221static int
2222__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2223{
2224 kmp_thread_data_t ** threads_data_p;
2225 kmp_int32 nthreads, maxthreads;
2226 int is_init_thread = FALSE;
2227
2228 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2229 // Already reallocated and initialized.
2230 return FALSE;
2231 }
2232
2233 threads_data_p = & task_team -> tt.tt_threads_data;
2234 nthreads = task_team -> tt.tt_nproc;
2235 maxthreads = task_team -> tt.tt_max_threads;
2236
2237 // All threads must lock when they encounter the first task of the implicit task
2238 // region to make sure threads_data fields are (re)initialized before used.
2239 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2240
2241 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2242 // first thread to enable tasking
2243 kmp_team_t *team = thread -> th.th_team;
2244 int i;
2245
2246 is_init_thread = TRUE;
2247 if ( maxthreads < nthreads ) {
2248
2249 if ( *threads_data_p != NULL ) {
2250 kmp_thread_data_t *old_data = *threads_data_p;
2251 kmp_thread_data_t *new_data = NULL;
2252
2253 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2254 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2255 __kmp_gtid_from_thread( thread ), task_team,
2256 nthreads, maxthreads ) );
2257 // Reallocate threads_data to have more elements than current array
2258 // Cannot use __kmp_thread_realloc() because threads not around for
2259 // kmp_reap_task_team( ). Note all new array entries are initialized
2260 // to zero by __kmp_allocate().
2261 new_data = (kmp_thread_data_t *)
2262 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2263 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002264 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002265 (void *) old_data,
2266 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002267
2268#ifdef BUILD_TIED_TASK_STACK
2269 // GEH: Figure out if this is the right thing to do
2270 for (i = maxthreads; i < nthreads; i++) {
2271 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2272 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2273 }
2274#endif // BUILD_TIED_TASK_STACK
2275 // Install the new data and free the old data
2276 (*threads_data_p) = new_data;
2277 __kmp_free( old_data );
2278 }
2279 else {
2280 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2281 "threads data for task_team %p, size = %d\n",
2282 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2283 // Make the initial allocate for threads_data array, and zero entries
2284 // Cannot use __kmp_thread_calloc() because threads not around for
2285 // kmp_reap_task_team( ).
2286 *threads_data_p = (kmp_thread_data_t *)
2287 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2288#ifdef BUILD_TIED_TASK_STACK
2289 // GEH: Figure out if this is the right thing to do
2290 for (i = 0; i < nthreads; i++) {
2291 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2292 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2293 }
2294#endif // BUILD_TIED_TASK_STACK
2295 }
2296 task_team -> tt.tt_max_threads = nthreads;
2297 }
2298 else {
2299 // If array has (more than) enough elements, go ahead and use it
2300 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2301 }
2302
2303 // initialize threads_data pointers back to thread_info structures
2304 for (i = 0; i < nthreads; i++) {
2305 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2306 thread_data -> td.td_thr = team -> t.t_threads[i];
2307
2308 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2309 // The last stolen field survives across teams / barrier, and the number
2310 // of threads may have changed. It's possible (likely?) that a new
2311 // parallel region will exhibit the same behavior as the previous region.
2312 thread_data -> td.td_deque_last_stolen = -1;
2313 }
2314 }
2315
2316 KMP_MB();
2317 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2318 }
2319
2320 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2321 return is_init_thread;
2322}
2323
2324
2325//------------------------------------------------------------------------------
2326// __kmp_free_task_threads_data:
2327// Deallocates a threads_data array for a task team, including any attached
2328// tasking deques. Only occurs at library shutdown.
2329
2330static void
2331__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2332{
2333 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2334 if ( task_team -> tt.tt_threads_data != NULL ) {
2335 int i;
2336 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2337 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2338 }
2339 __kmp_free( task_team -> tt.tt_threads_data );
2340 task_team -> tt.tt_threads_data = NULL;
2341 }
2342 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2343}
2344
2345
2346//------------------------------------------------------------------------------
2347// __kmp_allocate_task_team:
2348// Allocates a task team associated with a specific team, taking it from
2349// the global task team free list if possible. Also initializes data structures.
2350
2351static kmp_task_team_t *
2352__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2353{
2354 kmp_task_team_t *task_team = NULL;
2355 int nthreads;
2356
2357 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2358 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2359
2360 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2361 // Take a task team from the task team pool
2362 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2363 if (__kmp_free_task_teams != NULL) {
2364 task_team = __kmp_free_task_teams;
2365 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2366 task_team -> tt.tt_next = NULL;
2367 }
2368 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2369 }
2370
2371 if (task_team == NULL) {
2372 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2373 "task team for team %p\n",
2374 __kmp_gtid_from_thread( thread ), team ) );
2375 // Allocate a new task team if one is not available.
2376 // Cannot use __kmp_thread_malloc() because threads not around for
2377 // kmp_reap_task_team( ).
2378 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2379 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2380 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2381 //task_team -> tt.tt_max_threads = 0;
2382 //task_team -> tt.tt_next = NULL;
2383 }
2384
2385 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002386#if OMP_41_ENABLED
2387 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2388#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002389 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2390
Jim Cownie5e8470a2013-09-27 10:38:44 +00002391 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2392 TCW_4( task_team -> tt.tt_active, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002393
Jonathan Peyton54127982015-11-04 21:37:48 +00002394 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
2395 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002396 return task_team;
2397}
2398
2399
2400//------------------------------------------------------------------------------
2401// __kmp_free_task_team:
2402// Frees the task team associated with a specific thread, and adds it
2403// to the global task team free list.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002404
Jonathan Peyton54127982015-11-04 21:37:48 +00002405void
Jim Cownie5e8470a2013-09-27 10:38:44 +00002406__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2407{
2408 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2409 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2410
Jim Cownie5e8470a2013-09-27 10:38:44 +00002411 // Put task team back on free list
2412 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2413
2414 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2415 task_team -> tt.tt_next = __kmp_free_task_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002416 TCW_PTR(__kmp_free_task_teams, task_team);
2417
2418 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2419}
2420
2421
2422//------------------------------------------------------------------------------
2423// __kmp_reap_task_teams:
2424// Free all the task teams on the task team free list.
2425// Should only be done during library shutdown.
2426// Cannot do anything that needs a thread structure or gtid since they are already gone.
2427
2428void
2429__kmp_reap_task_teams( void )
2430{
2431 kmp_task_team_t *task_team;
2432
2433 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2434 // Free all task_teams on the free list
2435 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2436 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2437 __kmp_free_task_teams = task_team -> tt.tt_next;
2438 task_team -> tt.tt_next = NULL;
2439
2440 // Free threads_data if necessary
2441 if ( task_team -> tt.tt_threads_data != NULL ) {
2442 __kmp_free_task_threads_data( task_team );
2443 }
2444 __kmp_free( task_team );
2445 }
2446 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2447 }
2448}
2449
Jim Cownie5e8470a2013-09-27 10:38:44 +00002450//------------------------------------------------------------------------------
2451// __kmp_wait_to_unref_task_teams:
2452// Some threads could still be in the fork barrier release code, possibly
2453// trying to steal tasks. Wait for each thread to unreference its task team.
2454//
2455void
2456__kmp_wait_to_unref_task_teams(void)
2457{
2458 kmp_info_t *thread;
2459 kmp_uint32 spins;
2460 int done;
2461
2462 KMP_INIT_YIELD( spins );
2463
Jim Cownie5e8470a2013-09-27 10:38:44 +00002464 for (;;) {
2465 done = TRUE;
2466
2467 // TODO: GEH - this may be is wrong because some sync would be necessary
2468 // in case threads are added to the pool during the traversal.
2469 // Need to verify that lock for thread pool is held when calling
2470 // this routine.
2471 for (thread = (kmp_info_t *)__kmp_thread_pool;
2472 thread != NULL;
2473 thread = thread->th.th_next_pool)
2474 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002475#if KMP_OS_WINDOWS
2476 DWORD exit_val;
2477#endif
2478 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2479 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2480 __kmp_gtid_from_thread( thread ) ) );
2481 continue;
2482 }
2483#if KMP_OS_WINDOWS
2484 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2485 if (!__kmp_is_thread_alive(thread, &exit_val)) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002486 thread->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002487 continue;
2488 }
2489#endif
2490
2491 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2492
2493 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2494 __kmp_gtid_from_thread( thread ) ) );
2495
2496 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002497 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002498 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002499 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002500 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2501 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002502 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002503 }
2504 }
2505 }
2506 if (done) {
2507 break;
2508 }
2509
2510 // If we are oversubscribed,
2511 // or have waited a bit (and library mode is throughput), yield.
2512 // Pause is in the following code.
2513 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2514 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2515 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002516}
2517
2518
2519//------------------------------------------------------------------------------
2520// __kmp_task_team_setup: Create a task_team for the current team, but use
2521// an already created, unused one if it already exists.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002522void
Jonathan Peyton54127982015-11-04 21:37:48 +00002523__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002524{
2525 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2526
Jonathan Peyton54127982015-11-04 21:37:48 +00002527 // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
2528 // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
Jonathan Peyton61118492016-05-20 19:03:38 +00002529 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002530 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002531 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002532 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002533 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002534 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002535
Jonathan Peyton61118492016-05-20 19:03:38 +00002536 // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is
Jonathan Peyton54127982015-11-04 21:37:48 +00002537 // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
2538 // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
Jonathan Peyton61118492016-05-20 19:03:38 +00002539 // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for
Jonathan Peyton54127982015-11-04 21:37:48 +00002540 // serialized teams.
Jonathan Peytone1dad192015-11-30 20:05:13 +00002541 if (team->t.t_nproc > 1) {
2542 int other_team = 1 - this_thr->th.th_task_state;
2543 if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
2544 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2545 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2546 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2547 ((team != NULL) ? team->t.t_id : -1), other_team ));
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002548 }
Jonathan Peytone1dad192015-11-30 20:05:13 +00002549 else { // Leave the old task team struct in place for the upcoming region; adjust as needed
2550 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2551 if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
2552 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2553 TCW_4(task_team->tt.tt_found_tasks, FALSE);
2554#if OMP_41_ENABLED
2555 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2556#endif
2557 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
2558 TCW_4(task_team->tt.tt_active, TRUE );
2559 }
2560 // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
2561 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
2562 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2563 ((team != NULL) ? team->t.t_id : -1), other_team ));
2564 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002565 }
2566}
2567
2568
2569//------------------------------------------------------------------------------
2570// __kmp_task_team_sync: Propagation of task team data from team to threads
2571// which happens just after the release phase of a team barrier. This may be
2572// called by any thread, but only for teams with # threads > 1.
2573
2574void
2575__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2576{
2577 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2578
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002579 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002580 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002581 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2582 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jonathan Peyton54127982015-11-04 21:37:48 +00002583 KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002584 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2585 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002586}
2587
2588
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002589//--------------------------------------------------------------------------------------------
2590// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
Jonathan Peyton54127982015-11-04 21:37:48 +00002591// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created.
2592// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
2593// optionally as the last argument. When wait is zero, master thread does not wait for
2594// unfinished_threads to reach 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002595void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002596__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002597 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jonathan Peyton54127982015-11-04 21:37:48 +00002598 , int wait)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002599{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002600 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002601
2602 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2603 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2604
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002605 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002606 if (wait) {
2607 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2608 __kmp_gtid_from_thread(this_thr), task_team));
2609 // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
2610 // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
2611 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2612 flag.wait(this_thr, TRUE
2613 USE_ITT_BUILD_ARG(itt_sync_obj));
2614 }
2615 // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
2616 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
2617 "setting active to false, setting local and team's pointer to NULL\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002618 __kmp_gtid_from_thread(this_thr), task_team));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002619#if OMP_41_ENABLED
2620 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2621 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2622#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002623 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002624#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002625 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2626 KMP_MB();
2627
2628 TCW_PTR(this_thr->th.th_task_team, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002629 }
2630}
2631
2632
2633//------------------------------------------------------------------------------
2634// __kmp_tasking_barrier:
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002635// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002636// Internal function to execute all tasks prior to a regular barrier or a
2637// join barrier. It is a full barrier itself, which unfortunately turns
2638// regular barriers into double barriers and join barriers into 1 1/2
2639// barriers.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002640void
2641__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2642{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002643 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002644 int flag = FALSE;
2645 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2646
2647#if USE_ITT_BUILD
2648 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2649#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002650 kmp_flag_32 spin_flag(spin, 0U);
2651 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2652 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002653#if USE_ITT_BUILD
2654 // TODO: What about itt_sync_obj??
2655 KMP_FSYNC_SPIN_PREPARE( spin );
2656#endif /* USE_ITT_BUILD */
2657
2658 if( TCR_4(__kmp_global.g.g_done) ) {
2659 if( __kmp_global.g.g_abort )
2660 __kmp_abort_thread( );
2661 break;
2662 }
2663 KMP_YIELD( TRUE ); // GH: We always yield here
2664 }
2665#if USE_ITT_BUILD
2666 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2667#endif /* USE_ITT_BUILD */
2668}
2669
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002670
2671#if OMP_41_ENABLED
2672
2673/* __kmp_give_task puts a task into a given thread queue if:
Jonathan Peytonff684e42016-02-11 22:58:29 +00002674 - the queue for that thread was created
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002675 - there's space in that queue
2676
2677 Because of this, __kmp_push_task needs to check if there's space after getting the lock
2678 */
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002679static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task, kmp_int32 pass )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002680{
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002681 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002682 kmp_task_team_t * task_team = taskdata->td_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002683
2684 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2685
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002686 // If task_team is NULL something went really bad...
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002687 KMP_DEBUG_ASSERT( task_team != NULL );
2688
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002689 bool result = false;
2690 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2691
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002692 if (thread_data -> td.td_deque == NULL ) {
2693 // There's no queue in this thread, go find another one
2694 // We're guaranteed that at least one thread has a queue
2695 KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2696 return result;
2697 }
2698
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002699 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002700 {
2701 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002702
2703 // if this deque is bigger than the pass ratio give a chance to another thread
2704 if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass ) return result;
2705
2706 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2707 __kmp_realloc_task_deque(thread,thread_data);
2708
2709 } else {
2710
2711 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2712
2713 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
2714 {
2715 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2716
2717 // if this deque is bigger than the pass ratio give a chance to another thread
2718 if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass )
2719 goto release_and_exit;
2720
2721 __kmp_realloc_task_deque(thread,thread_data);
2722 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002723 }
2724
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002725 // lock is held here, and there is space in the deque
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002726
2727 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2728 // Wrap index.
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002729 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002730 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2731
2732 result = true;
Jonathan Peyton1406f012015-05-22 22:35:51 +00002733 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002734
2735release_and_exit:
2736 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2737
2738 return result;
2739}
2740
2741
2742/* The finish of the a proxy tasks is divided in two pieces:
2743 - the top half is the one that can be done from a thread outside the team
2744 - the bottom half must be run from a them within the team
2745
2746 In order to run the bottom half the task gets queued back into one of the threads of the team.
2747 Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2748 So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2749 - things that can be run before queuing the bottom half
2750 - things that must be run after queuing the bottom half
2751
2752 This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2753 we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2754*/
2755
2756static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2757{
2758 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2759 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2760 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2761 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2762
2763 taskdata -> td_flags.complete = 1; // mark the task as completed
2764
2765 if ( taskdata->td_taskgroup )
2766 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2767
2768 // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
Paul Osmialowski52bef532016-05-07 00:00:00 +00002769 TCI_4(taskdata->td_incomplete_child_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002770}
2771
2772static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2773{
2774 kmp_int32 children = 0;
2775
2776 // Predecrement simulated by "- 1" calculation
2777 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2778 KMP_DEBUG_ASSERT( children >= 0 );
2779
2780 // Remove the imaginary children
Paul Osmialowski52bef532016-05-07 00:00:00 +00002781 TCD_4(taskdata->td_incomplete_child_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002782}
2783
2784static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2785{
2786 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2787 kmp_info_t * thread = __kmp_threads[ gtid ];
2788
2789 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2790 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2791
2792 // We need to wait to make sure the top half is finished
2793 // Spinning here should be ok as this should happen quickly
2794 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2795
2796 __kmp_release_deps(gtid,taskdata);
2797 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2798}
2799
2800/*!
2801@ingroup TASKING
2802@param gtid Global Thread ID of encountering thread
2803@param ptask Task which execution is completed
2804
2805Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
2806*/
2807void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2808{
2809 KMP_DEBUG_ASSERT( ptask != NULL );
2810 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2811 KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2812
2813 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2814
2815 __kmp_first_top_half_finish_proxy(taskdata);
2816 __kmp_second_top_half_finish_proxy(taskdata);
2817 __kmp_bottom_half_finish_proxy(gtid,ptask);
2818
2819 KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2820}
2821
2822/*!
2823@ingroup TASKING
2824@param ptask Task which execution is completed
2825
2826Execute the completation of a proxy task from a thread that could not belong to the team.
2827*/
2828void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2829{
2830 KMP_DEBUG_ASSERT( ptask != NULL );
2831 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2832
2833 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2834
2835 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2836
2837 __kmp_first_top_half_finish_proxy(taskdata);
2838
Jonathan Peytonff684e42016-02-11 22:58:29 +00002839 // Enqueue task to complete bottom half completion from a thread within the corresponding team
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002840 kmp_team_t * team = taskdata->td_team;
2841 kmp_int32 nthreads = team->t.t_nproc;
2842 kmp_info_t *thread;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002843
2844 //This should be similar to start_k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
2845 kmp_int32 start_k = 0;
2846 kmp_int32 pass = 1;
2847 kmp_int32 k = start_k;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002848
2849 do {
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002850 //For now we're just linearly trying to find a thread
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002851 thread = team->t.t_threads[k];
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002852 k = (k+1) % nthreads;
2853
2854 // we did a full pass through all the threads
2855 if ( k == start_k ) pass = pass << 1;
2856
2857 } while ( !__kmp_give_task( thread, k, ptask, pass ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002858
2859 __kmp_second_top_half_finish_proxy(taskdata);
2860
2861 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2862}
2863
Jonathan Peyton283a2152016-03-02 22:47:51 +00002864//---------------------------------------------------------------------------------
2865// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task for taskloop
2866//
2867// thread: allocating thread
2868// task_src: pointer to source task to be duplicated
2869// returns: a pointer to the allocated kmp_task_t structure (task).
2870kmp_task_t *
2871__kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src )
2872{
2873 kmp_task_t *task;
2874 kmp_taskdata_t *taskdata;
2875 kmp_taskdata_t *taskdata_src;
2876 kmp_taskdata_t *parent_task = thread->th.th_current_task;
2877 size_t shareds_offset;
2878 size_t task_size;
2879
2880 KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) );
2881 taskdata_src = KMP_TASK_TO_TASKDATA( task_src );
2882 KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL ); // it should not be proxy task
2883 KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT );
2884 task_size = taskdata_src->td_size_alloc;
2885
2886 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
2887 KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) );
2888 #if USE_FAST_MEMORY
2889 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size );
2890 #else
2891 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size );
2892 #endif /* USE_FAST_MEMORY */
2893 KMP_MEMCPY(taskdata, taskdata_src, task_size);
2894
2895 task = KMP_TASKDATA_TO_TASK(taskdata);
2896
2897 // Initialize new task (only specific fields not affected by memcpy)
2898 taskdata->td_task_id = KMP_GEN_TASK_ID();
2899 if( task->shareds != NULL ) { // need setup shareds pointer
2900 shareds_offset = (char*)task_src->shareds - (char*)taskdata_src;
2901 task->shareds = &((char*)taskdata)[shareds_offset];
2902 KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (sizeof(void*)-1)) == 0 );
2903 }
2904 taskdata->td_alloc_thread = thread;
2905 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
2906
2907 // Only need to keep track of child task counts if team parallel and tasking not serialized
2908 if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) {
2909 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
2910 if ( parent_task->td_taskgroup )
2911 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
2912 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
2913 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT )
2914 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
2915 }
2916
2917 KA_TRACE(20, ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
2918 thread, taskdata, taskdata->td_parent) );
2919#if OMPT_SUPPORT
2920 __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (void*)task->routine);
2921#endif
2922 return task;
2923}
2924
2925// Routine optionally generated by th ecompiler for setting the lastprivate flag
2926// and calling needed constructors for private/firstprivate objects
2927// (used to form taskloop tasks from pattern task)
2928typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
2929
2930//---------------------------------------------------------------------------------
2931// __kmp_taskloop_linear: Start tasks of the taskloop linearly
2932//
2933// loc Source location information
2934// gtid Global thread ID
2935// task Task with whole loop iteration range
2936// lb Pointer to loop lower bound
2937// ub Pointer to loop upper bound
2938// st Loop stride
2939// sched Schedule specified 0/1/2 for none/grainsize/num_tasks
2940// grainsize Schedule value if specified
2941// task_dup Tasks duplication routine
2942void
2943__kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
2944 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
2945 int sched, kmp_uint64 grainsize, void *task_dup )
2946{
2947 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
2948 kmp_uint64 tc;
2949 kmp_uint64 lower = *lb; // compiler provides global bounds here
2950 kmp_uint64 upper = *ub;
Samuel Antao11e4c532016-03-12 00:55:17 +00002951 kmp_uint64 i, num_tasks = 0, extras = 0;
Jonathan Peyton283a2152016-03-02 22:47:51 +00002952 kmp_info_t *thread = __kmp_threads[gtid];
2953 kmp_taskdata_t *current_task = thread->th.th_current_task;
2954 kmp_task_t *next_task;
2955 kmp_int32 lastpriv = 0;
2956 size_t lower_offset = (char*)lb - (char*)task; // remember offset of lb in the task structure
2957 size_t upper_offset = (char*)ub - (char*)task; // remember offset of ub in the task structure
2958
2959 // compute trip count
2960 if ( st == 1 ) { // most common case
2961 tc = upper - lower + 1;
2962 } else if ( st < 0 ) {
2963 tc = (lower - upper) / (-st) + 1;
2964 } else { // st > 0
2965 tc = (upper - lower) / st + 1;
2966 }
2967 if(tc == 0) {
2968 // free the pattern task and exit
2969 __kmp_task_start( gtid, task, current_task );
2970 // do not execute anything for zero-trip loop
2971 __kmp_task_finish( gtid, task, current_task );
2972 return;
2973 }
2974
2975 // compute num_tasks/grainsize based on the input provided
2976 switch( sched ) {
2977 case 0: // no schedule clause specified, we can choose the default
2978 // let's try to schedule (team_size*10) tasks
2979 grainsize = thread->th.th_team_nproc * 10;
2980 case 2: // num_tasks provided
2981 if( grainsize > tc ) {
2982 num_tasks = tc; // too big num_tasks requested, adjust values
2983 grainsize = 1;
2984 extras = 0;
2985 } else {
2986 num_tasks = grainsize;
2987 grainsize = tc / num_tasks;
2988 extras = tc % num_tasks;
2989 }
2990 break;
2991 case 1: // grainsize provided
2992 if( grainsize > tc ) {
2993 num_tasks = 1; // too big grainsize requested, adjust values
2994 grainsize = tc;
2995 extras = 0;
2996 } else {
2997 num_tasks = tc / grainsize;
2998 grainsize = tc / num_tasks; // adjust grainsize for balanced distribution of iterations
2999 extras = tc % num_tasks;
3000 }
3001 break;
3002 default:
3003 KMP_ASSERT2(0, "unknown scheduling of taskloop");
3004 }
3005 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3006 KMP_DEBUG_ASSERT(num_tasks > extras);
3007 KMP_DEBUG_ASSERT(num_tasks > 0);
3008
3009 // Main loop, launch num_tasks tasks, assign grainsize iterations each task
3010 for( i = 0; i < num_tasks; ++i ) {
3011 kmp_uint64 chunk_minus_1;
3012 if( extras == 0 ) {
3013 chunk_minus_1 = grainsize - 1;
3014 } else {
3015 chunk_minus_1 = grainsize;
3016 --extras; // first extras iterations get bigger chunk (grainsize+1)
3017 }
3018 upper = lower + st * chunk_minus_1;
3019 if( i == num_tasks - 1 ) {
3020 // schedule the last task, set lastprivate flag
3021 lastpriv = 1;
3022#if KMP_DEBUG
3023 if( st == 1 )
3024 KMP_DEBUG_ASSERT(upper == *ub);
3025 else if( st > 0 )
3026 KMP_DEBUG_ASSERT(upper+st > *ub);
3027 else
3028 KMP_DEBUG_ASSERT(upper+st < *ub);
3029#endif
3030 }
3031 next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
3032 *(kmp_uint64*)((char*)next_task + lower_offset) = lower; // adjust task-specific bounds
3033 *(kmp_uint64*)((char*)next_task + upper_offset) = upper;
3034 if( ptask_dup != NULL )
3035 ptask_dup(next_task, task, lastpriv); // set lastprivate flag, construct fistprivates, etc.
3036 __kmp_omp_task(gtid, next_task, true); // schedule new task
3037 lower = upper + st; // adjust lower bound for the next iteration
3038 }
3039 // free the pattern task and exit
3040 __kmp_task_start( gtid, task, current_task );
3041 // do not execute the pattern task, just do bookkeeping
3042 __kmp_task_finish( gtid, task, current_task );
3043}
3044
3045/*!
3046@ingroup TASKING
3047@param loc Source location information
3048@param gtid Global thread ID
3049@param task Task structure
3050@param if_val Value of the if clause
3051@param lb Pointer to loop lower bound
3052@param ub Pointer to loop upper bound
3053@param st Loop stride
3054@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
3055@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3056@param grainsize Schedule value if specified
3057@param task_dup Tasks duplication routine
3058
3059Execute the taskloop construct.
3060*/
3061void
3062__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
3063 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3064 int nogroup, int sched, kmp_uint64 grainsize, void *task_dup )
3065{
3066 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
3067 KMP_DEBUG_ASSERT( task != NULL );
3068
3069 KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n",
3070 gtid, taskdata, *lb, *ub, st, grainsize, sched));
3071
3072 // check if clause value first
3073 if( if_val == 0 ) { // if(0) specified, mark task as serial
3074 taskdata->td_flags.task_serial = 1;
3075 taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
3076 }
3077 if( nogroup == 0 ) {
3078 __kmpc_taskgroup( loc, gtid );
3079 }
3080
3081 if( 1 /* AC: use some heuristic here to choose task scheduling method */ ) {
3082 __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup );
3083 }
3084
3085 if( nogroup == 0 ) {
3086 __kmpc_end_taskgroup( loc, gtid );
3087 }
3088 KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
3089}
3090
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003091#endif