blob: 2758027f572381358803e70118567d2603ea21c2 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jonathan Peyton45be4502015-08-11 21:36:41 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanove5f44922015-04-29 16:22:07 +000022#if OMPT_SUPPORT
23#include "ompt-specific.h"
24#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
Jim Cownie5e8470a2013-09-27 10:38:44 +000026/* ------------------------------------------------------------------------ */
27/* ------------------------------------------------------------------------ */
28
29
30/* forward declaration */
31static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
34
Andrey Churbanov535b6fa2015-05-07 17:41:51 +000035#ifdef OMP_41_ENABLED
36static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
37#endif
38
Jim Cownie4cc4bb42014-10-07 16:25:50 +000039static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
Jonathan Peytona0e159f2015-10-08 18:23:38 +000040 if (!flag) return;
Jonathan Peyton3f5dfc22015-11-09 16:31:51 +000041 // Attempt to wake up a thread: examine its type and call appropriate template
Jim Cownie4cc4bb42014-10-07 16:25:50 +000042 switch (((kmp_flag_64 *)flag)->get_type()) {
43 case flag32: __kmp_resume_32(gtid, NULL); break;
44 case flag64: __kmp_resume_64(gtid, NULL); break;
45 case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
46 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000047}
48
49#ifdef BUILD_TIED_TASK_STACK
50
51//---------------------------------------------------------------------------
52// __kmp_trace_task_stack: print the tied tasks from the task stack in order
53// from top do bottom
54//
55// gtid: global thread identifier for thread containing stack
56// thread_data: thread data for task team thread containing stack
57// threshold: value above which the trace statement triggers
58// location: string identifying call site of this function (for trace)
59
60static void
61__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
62{
63 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
64 kmp_taskdata_t **stack_top = task_stack -> ts_top;
65 kmp_int32 entries = task_stack -> ts_entries;
66 kmp_taskdata_t *tied_task;
67
68 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
69 "first_block = %p, stack_top = %p \n",
70 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
71
72 KMP_DEBUG_ASSERT( stack_top != NULL );
73 KMP_DEBUG_ASSERT( entries > 0 );
74
75 while ( entries != 0 )
76 {
77 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
78 // fix up ts_top if we need to pop from previous block
79 if ( entries & TASK_STACK_INDEX_MASK == 0 )
80 {
81 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
82
83 stack_block = stack_block -> sb_prev;
84 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
85 }
86
87 // finish bookkeeping
88 stack_top--;
89 entries--;
90
91 tied_task = * stack_top;
92
93 KMP_DEBUG_ASSERT( tied_task != NULL );
94 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
95
96 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
97 "stack_top=%p, tied_task=%p\n",
98 location, gtid, entries, stack_top, tied_task ) );
99 }
100 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
101
102 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
103 location, gtid ) );
104}
105
106//---------------------------------------------------------------------------
107// __kmp_init_task_stack: initialize the task stack for the first time
108// after a thread_data structure is created.
109// It should not be necessary to do this again (assuming the stack works).
110//
111// gtid: global thread identifier of calling thread
112// thread_data: thread data for task team thread containing stack
113
114static void
115__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
116{
117 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
118 kmp_stack_block_t *first_block;
119
120 // set up the first block of the stack
121 first_block = & task_stack -> ts_first_block;
122 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
123 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
124
125 // initialize the stack to be empty
126 task_stack -> ts_entries = TASK_STACK_EMPTY;
127 first_block -> sb_next = NULL;
128 first_block -> sb_prev = NULL;
129}
130
131
132//---------------------------------------------------------------------------
133// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
134//
135// gtid: global thread identifier for calling thread
136// thread_data: thread info for thread containing stack
137
138static void
139__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
140{
141 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
142 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
143
144 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
145 // free from the second block of the stack
146 while ( stack_block != NULL ) {
147 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
148
149 stack_block -> sb_next = NULL;
150 stack_block -> sb_prev = NULL;
151 if (stack_block != & task_stack -> ts_first_block) {
152 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
153 }
154 stack_block = next_block;
155 }
156 // initialize the stack to be empty
157 task_stack -> ts_entries = 0;
158 task_stack -> ts_top = NULL;
159}
160
161
162//---------------------------------------------------------------------------
163// __kmp_push_task_stack: Push the tied task onto the task stack.
164// Grow the stack if necessary by allocating another block.
165//
166// gtid: global thread identifier for calling thread
167// thread: thread info for thread containing stack
168// tied_task: the task to push on the stack
169
170static void
171__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
172{
173 // GEH - need to consider what to do if tt_threads_data not allocated yet
174 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
175 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
176 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
177
178 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
179 return; // Don't push anything on stack if team or team tasks are serialized
180 }
181
182 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
183 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
184
185 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
186 gtid, thread, tied_task ) );
187 // Store entry
188 * (task_stack -> ts_top) = tied_task;
189
190 // Do bookkeeping for next push
191 task_stack -> ts_top++;
192 task_stack -> ts_entries++;
193
194 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
195 {
196 // Find beginning of this task block
197 kmp_stack_block_t *stack_block =
198 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
199
200 // Check if we already have a block
201 if ( stack_block -> sb_next != NULL )
202 { // reset ts_top to beginning of next block
203 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
204 }
205 else
206 { // Alloc new block and link it up
207 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
208 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
209
210 task_stack -> ts_top = & new_block -> sb_block[0];
211 stack_block -> sb_next = new_block;
212 new_block -> sb_prev = stack_block;
213 new_block -> sb_next = NULL;
214
215 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
216 gtid, tied_task, new_block ) );
217 }
218 }
219 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
220}
221
222//---------------------------------------------------------------------------
223// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
224// the task, just check to make sure it matches the ending task passed in.
225//
226// gtid: global thread identifier for the calling thread
227// thread: thread info structure containing stack
228// tied_task: the task popped off the stack
229// ending_task: the task that is ending (should match popped task)
230
231static void
232__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
233{
234 // GEH - need to consider what to do if tt_threads_data not allocated yet
235 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
236 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
237 kmp_taskdata_t *tied_task;
238
239 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
240 return; // Don't pop anything from stack if team or team tasks are serialized
241 }
242
243 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
244 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
245
246 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
247
248 // fix up ts_top if we need to pop from previous block
249 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
250 {
251 kmp_stack_block_t *stack_block =
252 (kmp_stack_block_t *) (task_stack -> ts_top) ;
253
254 stack_block = stack_block -> sb_prev;
255 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
256 }
257
258 // finish bookkeeping
259 task_stack -> ts_top--;
260 task_stack -> ts_entries--;
261
262 tied_task = * (task_stack -> ts_top );
263
264 KMP_DEBUG_ASSERT( tied_task != NULL );
265 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
266 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
267
268 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
269 return;
270}
271#endif /* BUILD_TIED_TASK_STACK */
272
273//---------------------------------------------------
274// __kmp_push_task: Add a task to the thread's deque
275
276static kmp_int32
277__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
278{
279 kmp_info_t * thread = __kmp_threads[ gtid ];
280 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
281 kmp_task_team_t * task_team = thread->th.th_task_team;
282 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
283 kmp_thread_data_t * thread_data;
284
285 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
286
Jonathan Peytone6643da2016-04-18 21:35:14 +0000287 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
288 // untied task needs to increment counter so that the task structure is not freed prematurely
289 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
290 KA_TRACE(20, ( "__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
291 gtid, counter, taskdata ) );
292 }
293
Jim Cownie5e8470a2013-09-27 10:38:44 +0000294 // The first check avoids building task_team thread data if serialized
295 if ( taskdata->td_flags.task_serial ) {
296 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
297 gtid, taskdata ) );
298 return TASK_NOT_PUSHED;
299 }
300
301 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
302 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000303 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000304 __kmp_enable_tasking( task_team, thread );
305 }
306 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
307 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
308
309 // Find tasking deque specific to encountering thread
310 thread_data = & task_team -> tt.tt_threads_data[ tid ];
311
312 // No lock needed since only owner can allocate
313 if (thread_data -> td.td_deque == NULL ) {
314 __kmp_alloc_task_deque( thread, thread_data );
315 }
316
317 // Check if deque is full
318 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
319 {
320 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
321 gtid, taskdata ) );
322 return TASK_NOT_PUSHED;
323 }
324
325 // Lock the deque for the task push operation
326 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
327
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000328#if OMP_41_ENABLED
329 // Need to recheck as we can get a proxy task from a thread outside of OpenMP
330 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
331 {
332 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
333 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
334 gtid, taskdata ) );
335 return TASK_NOT_PUSHED;
336 }
337#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000338 // Must have room since no thread can add tasks but calling thread
339 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000340#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000341
342 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
343 // Wrap index.
344 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
345 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
346
347 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
348
349 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
350 "task=%p ntasks=%d head=%u tail=%u\n",
351 gtid, taskdata, thread_data->td.td_deque_ntasks,
352 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
353
354 return TASK_SUCCESSFULLY_PUSHED;
355}
356
357
358//-----------------------------------------------------------------------------------------
359// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
360// this_thr: thread structure to set current_task in.
361
362void
363__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
364{
365 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
366 "curtask_parent=%p\n",
367 0, this_thr, this_thr -> th.th_current_task,
368 this_thr -> th.th_current_task -> td_parent ) );
369
370 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
371
372 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
373 "curtask_parent=%p\n",
374 0, this_thr, this_thr -> th.th_current_task,
375 this_thr -> th.th_current_task -> td_parent ) );
376}
377
378
379//---------------------------------------------------------------------------------------
380// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
381// this_thr: thread structure to set up
382// team: team for implicit task data
383// tid: thread within team to set up
384
385void
386__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
387{
388 // current task of the thread is a parent of the new just created implicit tasks of new team
389 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
390 "parent_task=%p\n",
391 tid, this_thr, this_thr->th.th_current_task,
392 team->t.t_implicit_task_taskdata[tid].td_parent ) );
393
394 KMP_DEBUG_ASSERT (this_thr != NULL);
395
396 if( tid == 0 ) {
397 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
398 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
399 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
400 }
401 } else {
402 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
403 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
404 }
405
406 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
407 "parent_task=%p\n",
408 tid, this_thr, this_thr->th.th_current_task,
409 team->t.t_implicit_task_taskdata[tid].td_parent ) );
410}
411
412
413//----------------------------------------------------------------------
414// __kmp_task_start: bookkeeping for a task starting execution
415// GTID: global thread id of calling thread
416// task: task starting execution
417// current_task: task suspending
418
419static void
420__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
421{
422 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
423 kmp_info_t * thread = __kmp_threads[ gtid ];
424
425 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
426 gtid, taskdata, current_task) );
427
428 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
429
430 // mark currently executing task as suspended
431 // TODO: GEH - make sure root team implicit task is initialized properly.
432 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
433 current_task -> td_flags.executing = 0;
434
435 // Add task to stack if tied
436#ifdef BUILD_TIED_TASK_STACK
437 if ( taskdata -> td_flags.tiedness == TASK_TIED )
438 {
439 __kmp_push_task_stack( gtid, thread, taskdata );
440 }
441#endif /* BUILD_TIED_TASK_STACK */
442
443 // mark starting task as executing and as current task
444 thread -> th.th_current_task = taskdata;
445
Jonathan Peytone6643da2016-04-18 21:35:14 +0000446 KMP_DEBUG_ASSERT( taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
447 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000448 taskdata -> td_flags.started = 1;
449 taskdata -> td_flags.executing = 1;
450 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
451 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
452
453 // GEH TODO: shouldn't we pass some sort of location identifier here?
454 // APT: yes, we will pass location here.
455 // need to store current thread state (in a thread or taskdata structure)
456 // before setting work_state, otherwise wrong state is set after end of task
457
458 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
459 gtid, taskdata ) );
460
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000461#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000462 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000463 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
464 kmp_taskdata_t *parent = taskdata->td_parent;
465 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
466 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
467 parent ? &(parent->ompt_task_info.frame) : NULL,
468 taskdata->ompt_task_info.task_id,
469 taskdata->ompt_task_info.function);
470 }
471#endif
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000472#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
473 /* OMPT emit all dependences if requested by the tool */
474 if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
475 ompt_callbacks.ompt_callback(ompt_event_task_dependences))
476 {
477 ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
478 taskdata->ompt_task_info.task_id,
479 taskdata->ompt_task_info.deps,
480 taskdata->ompt_task_info.ndeps
481 );
482 /* We can now free the allocated memory for the dependencies */
483 KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps);
484 taskdata->ompt_task_info.deps = NULL;
485 taskdata->ompt_task_info.ndeps = 0;
486 }
487#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000488
Jim Cownie5e8470a2013-09-27 10:38:44 +0000489 return;
490}
491
492
493//----------------------------------------------------------------------
494// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
495// loc_ref: source location information; points to beginning of task block.
496// gtid: global thread number.
497// task: task thunk for the started task.
498
499void
500__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
501{
502 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
503 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
504
505 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
506 gtid, loc_ref, taskdata, current_task ) );
507
Jonathan Peytone6643da2016-04-18 21:35:14 +0000508 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
509 // untied task needs to increment counter so that the task structure is not freed prematurely
510 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
511 KA_TRACE(20, ( "__kmpc_omp_task_begin_if0: T#%d untied_count (%d) incremented for task %p\n",
512 gtid, counter, taskdata ) );
513 }
514
Jim Cownie5e8470a2013-09-27 10:38:44 +0000515 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
516 __kmp_task_start( gtid, task, current_task );
517
518 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
519 gtid, loc_ref, taskdata ) );
520
521 return;
522}
523
524#ifdef TASK_UNUSED
525//----------------------------------------------------------------------
526// __kmpc_omp_task_begin: report that a given task has started execution
527// NEVER GENERATED BY COMPILER, DEPRECATED!!!
528
529void
530__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
531{
532 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
533
534 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
535 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
536
537 __kmp_task_start( gtid, task, current_task );
538
539 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
540 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
541
542 return;
543}
544#endif // TASK_UNUSED
545
546
547//-------------------------------------------------------------------------------------
548// __kmp_free_task: free the current task space and the space for shareds
549// gtid: Global thread ID of calling thread
550// taskdata: task to free
551// thread: thread data structure of caller
552
553static void
554__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
555{
556 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
557 gtid, taskdata) );
558
559 // Check to make sure all flags and counters have the correct values
560 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
561 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
562 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
563 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
564 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
565 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
566
567 taskdata->td_flags.freed = 1;
568 // deallocate the taskdata and shared variable blocks associated with this task
569 #if USE_FAST_MEMORY
570 __kmp_fast_free( thread, taskdata );
571 #else /* ! USE_FAST_MEMORY */
572 __kmp_thread_free( thread, taskdata );
573 #endif
574
575 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
576 gtid, taskdata) );
577}
578
579//-------------------------------------------------------------------------------------
580// __kmp_free_task_and_ancestors: free the current task and ancestors without children
581//
582// gtid: Global thread ID of calling thread
583// taskdata: task to free
584// thread: thread data structure of caller
585
586static void
587__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
588{
589 kmp_int32 children = 0;
590 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
591
592 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
593
594 if ( !team_or_tasking_serialized ) {
595 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
596 KMP_DEBUG_ASSERT( children >= 0 );
597 }
598
599 // Now, go up the ancestor tree to see if any ancestors can now be freed.
600 while ( children == 0 )
601 {
602 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
603
604 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
605 "and freeing itself\n", gtid, taskdata) );
606
607 // --- Deallocate my ancestor task ---
608 __kmp_free_task( gtid, taskdata, thread );
609
610 taskdata = parent_taskdata;
611
612 // Stop checking ancestors at implicit task or if tasking serialized
613 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
614 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
615 return;
616
617 if ( !team_or_tasking_serialized ) {
618 // Predecrement simulated by "- 1" calculation
619 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
620 KMP_DEBUG_ASSERT( children >= 0 );
621 }
622 }
623
624 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
625 "not freeing it yet\n", gtid, taskdata, children) );
626}
627
628//---------------------------------------------------------------------
629// __kmp_task_finish: bookkeeping to do when a task finishes execution
630// gtid: global thread ID for calling thread
631// task: task to be finished
632// resumed_task: task to be resumed. (may be NULL if task is serialized)
633
634static void
635__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
636{
637 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
638 kmp_info_t * thread = __kmp_threads[ gtid ];
639 kmp_int32 children = 0;
640
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000641#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000642 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000643 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
644 kmp_taskdata_t *parent = taskdata->td_parent;
645 ompt_callbacks.ompt_callback(ompt_event_task_end)(
646 taskdata->ompt_task_info.task_id);
647 }
648#endif
649
Jim Cownie5e8470a2013-09-27 10:38:44 +0000650 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
651 gtid, taskdata, resumed_task) );
652
653 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
654
655 // Pop task from stack if tied
656#ifdef BUILD_TIED_TASK_STACK
657 if ( taskdata -> td_flags.tiedness == TASK_TIED )
658 {
659 __kmp_pop_task_stack( gtid, thread, taskdata );
660 }
661#endif /* BUILD_TIED_TASK_STACK */
662
Jonathan Peytone6643da2016-04-18 21:35:14 +0000663 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
664 // untied task needs to check the counter so that the task structure is not freed prematurely
665 kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
666 KA_TRACE(20, ( "__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
667 gtid, counter, taskdata ) );
668 if ( counter > 0 ) {
669 // untied task is not done, to be continued possibly by other thread, do not free it now
670 if (resumed_task == NULL) {
671 KMP_DEBUG_ASSERT( taskdata->td_flags.task_serial );
672 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
673 }
674 thread->th.th_current_task = resumed_task; // restore current_task
675 resumed_task->td_flags.executing = 1; // resume previous task
676 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, resuming task %p\n",
677 gtid, taskdata, resumed_task) );
678 return;
679 }
680 }
681
Jim Cownie5e8470a2013-09-27 10:38:44 +0000682 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000683 taskdata -> td_flags.complete = 1; // mark the task as completed
684 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
685 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
686
687 // Only need to keep track of count if team parallel and tasking not serialized
688 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
689 // Predecrement simulated by "- 1" calculation
690 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
691 KMP_DEBUG_ASSERT( children >= 0 );
692#if OMP_40_ENABLED
693 if ( taskdata->td_taskgroup )
694 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000695 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000696#endif
697 }
698
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000699 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
700 // Othertwise, if a task is executed immediately from the release_deps code
701 // the flag will be reset to 1 again by this same function
702 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
703 taskdata -> td_flags.executing = 0; // suspend the finishing task
704
Jim Cownie5e8470a2013-09-27 10:38:44 +0000705 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
706 gtid, taskdata, children) );
707
Jim Cownie181b4bb2013-12-23 17:28:57 +0000708#if OMP_40_ENABLED
709 /* If the tasks' destructor thunk flag has been set, we need to invoke the
710 destructor thunk that has been generated by the compiler.
711 The code is placed here, since at this point other tasks might have been released
712 hence overlapping the destructor invokations with some other work in the
713 released tasks. The OpenMP spec is not specific on when the destructors are
714 invoked, so we should be free to choose.
Jonathan Peyton28510722016-02-25 18:04:09 +0000715 */
Jim Cownie181b4bb2013-12-23 17:28:57 +0000716 if (taskdata->td_flags.destructors_thunk) {
Jonathan Peyton28510722016-02-25 18:04:09 +0000717 kmp_routine_entry_t destr_thunk = task->data1.destructors;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000718 KMP_ASSERT(destr_thunk);
719 destr_thunk(gtid, task);
720 }
721#endif // OMP_40_ENABLED
722
Jim Cownie5e8470a2013-09-27 10:38:44 +0000723 // bookkeeping for resuming task:
724 // GEH - note tasking_ser => task_serial
725 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
726 taskdata->td_flags.task_serial);
727 if ( taskdata->td_flags.task_serial )
728 {
729 if (resumed_task == NULL) {
730 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
731 }
732 else {
733 // verify resumed task passed in points to parent
734 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
735 }
736 }
737 else {
738 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
739 }
740
741 // Free this task and then ancestor tasks if they have no children.
Jonathan Peyton727ba6e2016-01-27 21:20:26 +0000742 // Restore th_current_task first as suggested by John:
743 // johnmc: if an asynchronous inquiry peers into the runtime system
744 // it doesn't see the freed task as the current task.
745 thread->th.th_current_task = resumed_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000746 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
747
Jim Cownie5e8470a2013-09-27 10:38:44 +0000748 // TODO: GEH - make sure root team implicit task is initialized properly.
749 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
750 resumed_task->td_flags.executing = 1; // resume previous task
751
752 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
753 gtid, taskdata, resumed_task) );
754
755 return;
756}
757
758//---------------------------------------------------------------------
759// __kmpc_omp_task_complete_if0: report that a task has completed execution
760// loc_ref: source location information; points to end of task block.
761// gtid: global thread number.
762// task: task thunk for the completed task.
763
764void
765__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
766{
767 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
768 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
769
770 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
771
772 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
773 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
774
775 return;
776}
777
778#ifdef TASK_UNUSED
779//---------------------------------------------------------------------
780// __kmpc_omp_task_complete: report that a task has completed execution
781// NEVER GENERATED BY COMPILER, DEPRECATED!!!
782
783void
784__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
785{
786 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
787 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
788
789 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
790
791 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
792 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
793 return;
794}
795#endif // TASK_UNUSED
796
797
Andrey Churbanove5f44922015-04-29 16:22:07 +0000798#if OMPT_SUPPORT
799//----------------------------------------------------------------------------------------------------
800// __kmp_task_init_ompt:
Jonathan Peytonb401db62015-10-09 17:38:05 +0000801// Initialize OMPT fields maintained by a task. This will only be called after
802// ompt_tool, so we already know whether ompt is enabled or not.
Andrey Churbanove5f44922015-04-29 16:22:07 +0000803
Jonathan Peytonb401db62015-10-09 17:38:05 +0000804static inline void
805__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function )
Andrey Churbanove5f44922015-04-29 16:22:07 +0000806{
Jonathan Peytonb401db62015-10-09 17:38:05 +0000807 if (ompt_enabled) {
808 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
809 task->ompt_task_info.function = function;
810 task->ompt_task_info.frame.exit_runtime_frame = NULL;
811 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000812#if OMP_40_ENABLED
813 task->ompt_task_info.ndeps = 0;
814 task->ompt_task_info.deps = NULL;
815#endif /* OMP_40_ENABLED */
Jonathan Peytonb401db62015-10-09 17:38:05 +0000816 }
Andrey Churbanove5f44922015-04-29 16:22:07 +0000817}
818#endif
819
820
Jim Cownie5e8470a2013-09-27 10:38:44 +0000821//----------------------------------------------------------------------------------------------------
822// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
823//
824// loc_ref: reference to source location of parallel region
825// this_thr: thread data structure corresponding to implicit task
826// team: team for this_thr
827// tid: thread id of given thread within team
828// set_curr_task: TRUE if need to push current task to thread
829// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
830// TODO: Get better loc_ref. Value passed in may be NULL
831
832void
833__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
834{
835 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
836
837 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
838 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
839
840 task->td_task_id = KMP_GEN_TASK_ID();
841 task->td_team = team;
842// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
843 task->td_ident = loc_ref;
844 task->td_taskwait_ident = NULL;
845 task->td_taskwait_counter = 0;
846 task->td_taskwait_thread = 0;
847
848 task->td_flags.tiedness = TASK_TIED;
849 task->td_flags.tasktype = TASK_IMPLICIT;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000850#if OMP_41_ENABLED
851 task->td_flags.proxy = TASK_FULL;
852#endif
853
Jim Cownie5e8470a2013-09-27 10:38:44 +0000854 // All implicit tasks are executed immediately, not deferred
855 task->td_flags.task_serial = 1;
856 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
857 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
858
859 task->td_flags.started = 1;
860 task->td_flags.executing = 1;
861 task->td_flags.complete = 0;
862 task->td_flags.freed = 0;
863
Jim Cownie181b4bb2013-12-23 17:28:57 +0000864#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000865 task->td_dephash = NULL;
866 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000867#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000868
869 if (set_curr_task) { // only do this initialization the first time a thread is created
870 task->td_incomplete_child_tasks = 0;
871 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
872#if OMP_40_ENABLED
873 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
874#endif
875 __kmp_push_current_task_to_thread( this_thr, team, tid );
876 } else {
877 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
878 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
879 }
880
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000881#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +0000882 __kmp_task_init_ompt(task, tid, NULL);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000883#endif
884
Jim Cownie5e8470a2013-09-27 10:38:44 +0000885 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
886 tid, team, task ) );
887}
888
889// Round up a size to a power of two specified by val
890// Used to insert padding between structures co-allocated using a single malloc() call
891static size_t
892__kmp_round_up_to_val( size_t size, size_t val ) {
893 if ( size & ( val - 1 ) ) {
894 size &= ~ ( val - 1 );
895 if ( size <= KMP_SIZE_T_MAX - val ) {
896 size += val; // Round up if there is no overflow.
897 }; // if
898 }; // if
899 return size;
900} // __kmp_round_up_to_va
901
902
903//---------------------------------------------------------------------------------
904// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
905//
906// loc_ref: source location information
907// gtid: global thread number.
908// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
909// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
910// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
911// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
912// task_entry: Pointer to task code entry point generated by compiler.
913// returns: a pointer to the allocated kmp_task_t structure (task).
914
915kmp_task_t *
916__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
917 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
918 kmp_routine_entry_t task_entry )
919{
920 kmp_task_t *task;
921 kmp_taskdata_t *taskdata;
922 kmp_info_t *thread = __kmp_threads[ gtid ];
923 kmp_team_t *team = thread->th.th_team;
924 kmp_taskdata_t *parent_task = thread->th.th_current_task;
925 size_t shareds_offset;
926
927 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
928 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
929 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
930 sizeof_shareds, task_entry) );
931
932 if ( parent_task->td_flags.final ) {
933 if (flags->merged_if0) {
934 }
935 flags->final = 1;
936 }
937
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000938#if OMP_41_ENABLED
939 if ( flags->proxy == TASK_PROXY ) {
940 flags->tiedness = TASK_UNTIED;
941 flags->merged_if0 = 1;
942
943 /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
944 if ( (thread->th.th_task_team) == NULL ) {
945 /* This should only happen if the team is serialized
946 setup a task team and propagate it to the thread
947 */
948 KMP_DEBUG_ASSERT(team->t.t_serialized);
949 KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
Jonathan Peyton54127982015-11-04 21:37:48 +0000950 __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000951 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
952 }
953 kmp_task_team_t * task_team = thread->th.th_task_team;
954
955 /* tasking must be enabled now as the task might not be pushed */
956 if ( !KMP_TASKING_ENABLED( task_team ) ) {
957 KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
958 __kmp_enable_tasking( task_team, thread );
959 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
960 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
961 // No lock needed since only owner can allocate
962 if (thread_data -> td.td_deque == NULL ) {
963 __kmp_alloc_task_deque( thread, thread_data );
964 }
965 }
966
967 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
968 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
969 }
970#endif
971
Jim Cownie5e8470a2013-09-27 10:38:44 +0000972 // Calculate shared structure offset including padding after kmp_task_t struct
973 // to align pointers in shared struct
974 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
975 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
976
977 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
978 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
979 gtid, shareds_offset) );
980 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
981 gtid, sizeof_shareds) );
982
983 // Avoid double allocation here by combining shareds with taskdata
984 #if USE_FAST_MEMORY
985 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
986 #else /* ! USE_FAST_MEMORY */
987 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
988 #endif /* USE_FAST_MEMORY */
989
990 task = KMP_TASKDATA_TO_TASK(taskdata);
991
992 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000993#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000994 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
995 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
996#else
997 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
998 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
999#endif
1000 if (sizeof_shareds > 0) {
1001 // Avoid double allocation here by combining shareds with taskdata
1002 task->shareds = & ((char *) taskdata)[ shareds_offset ];
1003 // Make sure shareds struct is aligned to pointer size
1004 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
1005 } else {
1006 task->shareds = NULL;
1007 }
1008 task->routine = task_entry;
1009 task->part_id = 0; // AC: Always start with 0 part id
1010
1011 taskdata->td_task_id = KMP_GEN_TASK_ID();
1012 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001013 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001014 taskdata->td_parent = parent_task;
1015 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
Jonathan Peytone6643da2016-04-18 21:35:14 +00001016 taskdata->td_untied_count = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001017 taskdata->td_ident = loc_ref;
1018 taskdata->td_taskwait_ident = NULL;
1019 taskdata->td_taskwait_counter = 0;
1020 taskdata->td_taskwait_thread = 0;
1021 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001022#if OMP_41_ENABLED
1023 // avoid copying icvs for proxy tasks
1024 if ( flags->proxy == TASK_FULL )
1025#endif
1026 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001027
1028 taskdata->td_flags.tiedness = flags->tiedness;
1029 taskdata->td_flags.final = flags->final;
1030 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001031#if OMP_40_ENABLED
1032 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1033#endif // OMP_40_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001034#if OMP_41_ENABLED
1035 taskdata->td_flags.proxy = flags->proxy;
Jonathan Peyton134f90d2016-02-11 23:07:30 +00001036 taskdata->td_task_team = thread->th.th_task_team;
Jonathan Peyton283a2152016-03-02 22:47:51 +00001037 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001038#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001039 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1040
1041 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1042 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
1043
1044 // GEH - TODO: fix this to copy parent task's value of team_serial flag
1045 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
1046
1047 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
1048 // tasks are not left until program termination to execute. Also, it helps locality to execute
1049 // immediately.
Jonathan Peyton7881aa12015-05-21 21:16:38 +00001050 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
Jim Cownie5e8470a2013-09-27 10:38:44 +00001051 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
1052
1053 taskdata->td_flags.started = 0;
1054 taskdata->td_flags.executing = 0;
1055 taskdata->td_flags.complete = 0;
1056 taskdata->td_flags.freed = 0;
1057
1058 taskdata->td_flags.native = flags->native;
1059
1060 taskdata->td_incomplete_child_tasks = 0;
1061 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1062#if OMP_40_ENABLED
1063 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1064 taskdata->td_dephash = NULL;
1065 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001066#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001067
1068 // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
1069#if OMP_41_ENABLED
1070 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1071#else
1072 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1073#endif
1074 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001075 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1076#if OMP_40_ENABLED
1077 if ( parent_task->td_taskgroup )
1078 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1079#endif
1080 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1081 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1082 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1083 }
1084 }
1085
1086 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1087 gtid, taskdata, taskdata->td_parent) );
1088
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001089#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +00001090 __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001091#endif
1092
Jim Cownie5e8470a2013-09-27 10:38:44 +00001093 return task;
1094}
1095
1096
1097kmp_task_t *
1098__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1099 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1100 kmp_routine_entry_t task_entry )
1101{
1102 kmp_task_t *retval;
1103 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1104
1105 input_flags->native = FALSE;
1106 // __kmp_task_alloc() sets up all other runtime flags
1107
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001108#if OMP_41_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001109 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001110 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1111 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001112 input_flags->proxy ? "proxy" : "",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001113 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001114#else
1115 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1116 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1117 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1118 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1119#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001120
1121 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1122 sizeof_shareds, task_entry );
1123
1124 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1125
1126 return retval;
1127}
1128
1129//-----------------------------------------------------------
1130// __kmp_invoke_task: invoke the specified task
1131//
1132// gtid: global thread ID of caller
1133// task: the task to invoke
1134// current_task: the task to resume after task invokation
1135
1136static void
1137__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1138{
1139 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001140 kmp_uint64 cur_time;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001141#if OMP_40_ENABLED
1142 int discard = 0 /* false */;
1143#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001144 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1145 gtid, taskdata, current_task) );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00001146 KMP_DEBUG_ASSERT(task);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001147#if OMP_41_ENABLED
1148 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1149 taskdata->td_flags.complete == 1)
1150 {
1151 // This is a proxy task that was already completed but it needs to run
1152 // its bottom-half finish
1153 KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1154 gtid, taskdata) );
1155
1156 __kmp_bottom_half_finish_proxy(gtid,task);
1157
1158 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1159
1160 return;
1161 }
1162#endif
1163
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001164#if USE_ITT_BUILD && USE_ITT_NOTIFY
1165 if(__kmp_forkjoin_frames_mode == 3) {
1166 // Get the current time stamp to measure task execution time to correct barrier imbalance time
1167 cur_time = __itt_get_timestamp();
1168 }
1169#endif
1170
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001171#if OMP_41_ENABLED
1172 // Proxy tasks are not handled by the runtime
1173 if ( taskdata->td_flags.proxy != TASK_PROXY )
1174#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001175 __kmp_task_start( gtid, task, current_task );
1176
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001177#if OMPT_SUPPORT
1178 ompt_thread_info_t oldInfo;
1179 kmp_info_t * thread;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001180 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001181 // Store the threads states and restore them after the task
1182 thread = __kmp_threads[ gtid ];
1183 oldInfo = thread->th.ompt_thread_info;
1184 thread->th.ompt_thread_info.wait_id = 0;
1185 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1186 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1187 }
1188#endif
1189
Jim Cownie181b4bb2013-12-23 17:28:57 +00001190#if OMP_40_ENABLED
1191 // TODO: cancel tasks if the parallel region has also been cancelled
1192 // TODO: check if this sequence can be hoisted above __kmp_task_start
1193 // if cancellation has been enabled for this run ...
1194 if (__kmp_omp_cancellation) {
1195 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1196 kmp_team_t * this_team = this_thr->th.th_team;
1197 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1198 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001199 KMP_COUNT_BLOCK(TASK_cancelled);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001200 // this task belongs to a task group and we need to cancel it
1201 discard = 1 /* true */;
1202 }
1203 }
1204
Jim Cownie5e8470a2013-09-27 10:38:44 +00001205 //
1206 // Invoke the task routine and pass in relevant data.
1207 // Thunks generated by gcc take a different argument list.
1208 //
Jim Cownie181b4bb2013-12-23 17:28:57 +00001209 if (!discard) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001210 KMP_COUNT_BLOCK(TASK_executed);
Jonathan Peyton495e1532016-03-11 20:23:05 +00001211 KMP_TIME_BLOCK (OMP_task);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001212#endif // OMP_40_ENABLED
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001213
1214#if OMPT_SUPPORT && OMPT_TRACE
1215 /* let OMPT know that we're about to run this task */
1216 if (ompt_enabled &&
1217 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1218 {
1219 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1220 current_task->ompt_task_info.task_id,
1221 taskdata->ompt_task_info.task_id);
1222 }
1223#endif
1224
Jim Cownie5e8470a2013-09-27 10:38:44 +00001225#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +00001226 if (taskdata->td_flags.native) {
1227 ((void (*)(void *))(*(task->routine)))(task->shareds);
1228 }
1229 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001230#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001231 {
1232 (*(task->routine))(gtid, task);
1233 }
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001234
1235#if OMPT_SUPPORT && OMPT_TRACE
1236 /* let OMPT know that we're returning to the callee task */
1237 if (ompt_enabled &&
1238 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1239 {
1240 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1241 taskdata->ompt_task_info.task_id,
1242 current_task->ompt_task_info.task_id);
1243 }
1244#endif
1245
Jim Cownie181b4bb2013-12-23 17:28:57 +00001246#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001247 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001248#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001249
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001250
1251#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001252 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001253 thread->th.ompt_thread_info = oldInfo;
1254 taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1255 }
1256#endif
1257
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001258#if OMP_41_ENABLED
1259 // Proxy tasks are not handled by the runtime
1260 if ( taskdata->td_flags.proxy != TASK_PROXY )
1261#endif
1262 __kmp_task_finish( gtid, task, current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001263
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001264#if USE_ITT_BUILD && USE_ITT_NOTIFY
1265 // Barrier imbalance - correct arrive time after the task finished
1266 if(__kmp_forkjoin_frames_mode == 3) {
1267 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1268 if(this_thr->th.th_bar_arrive_time) {
1269 this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1270 }
1271 }
1272#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001273 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001274 gtid, taskdata, current_task) );
1275 return;
1276}
1277
1278//-----------------------------------------------------------------------
1279// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1280//
1281// loc_ref: location of original task pragma (ignored)
1282// gtid: Global Thread ID of encountering thread
1283// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1284// Returns:
1285// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1286// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1287
1288kmp_int32
1289__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1290{
1291 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1292
1293 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1294 gtid, loc_ref, new_taskdata ) );
1295
1296 /* Should we execute the new task or queue it? For now, let's just always try to
1297 queue it. If the queue fills up, then we'll execute it. */
1298
1299 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1300 { // Execute this task immediately
1301 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1302 new_taskdata->td_flags.task_serial = 1;
1303 __kmp_invoke_task( gtid, new_task, current_task );
1304 }
1305
1306 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1307 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1308 new_taskdata ) );
1309
1310 return TASK_CURRENT_NOT_QUEUED;
1311}
1312
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001313//---------------------------------------------------------------------
1314// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1315// gtid: Global Thread ID of encountering thread
1316// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1317// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1318// returns:
1319//
1320// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1321// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1322kmp_int32
1323__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1324{
1325 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1326
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001327#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001328 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001329 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1330 __builtin_frame_address(0);
1331 }
1332#endif
1333
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001334 /* Should we execute the new task or queue it? For now, let's just always try to
1335 queue it. If the queue fills up, then we'll execute it. */
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001336#if OMP_41_ENABLED
1337 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1338#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001339 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001340#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001341 { // Execute this task immediately
1342 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1343 if ( serialize_immediate )
1344 new_taskdata -> td_flags.task_serial = 1;
1345 __kmp_invoke_task( gtid, new_task, current_task );
1346 }
1347
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001348#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001349 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001350 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1351 }
1352#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001353
1354 return TASK_CURRENT_NOT_QUEUED;
1355}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001356
1357//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001358// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1359// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001360// loc_ref: location of original task pragma (ignored)
1361// gtid: Global Thread ID of encountering thread
1362// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1363// returns:
1364//
1365// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1366// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1367
1368kmp_int32
1369__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1370{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001371 kmp_int32 res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001372
Jonathan Peytond2eb3c72015-08-26 20:02:21 +00001373#if KMP_DEBUG
1374 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1375#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001376 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1377 gtid, loc_ref, new_taskdata ) );
1378
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001379 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001380
1381 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1382 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001383 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001384}
1385
Jim Cownie5e8470a2013-09-27 10:38:44 +00001386//-------------------------------------------------------------------------------------
1387// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1388
1389kmp_int32
1390__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1391{
1392 kmp_taskdata_t * taskdata;
1393 kmp_info_t * thread;
1394 int thread_finished = FALSE;
1395
Jonathan Peyton54127982015-11-04 21:37:48 +00001396 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001397
1398 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1399 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1400
1401 thread = __kmp_threads[ gtid ];
1402 taskdata = thread -> th.th_current_task;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001403
1404#if OMPT_SUPPORT && OMPT_TRACE
1405 ompt_task_id_t my_task_id;
1406 ompt_parallel_id_t my_parallel_id;
1407
1408 if (ompt_enabled) {
1409 kmp_team_t *team = thread->th.th_team;
1410 my_task_id = taskdata->ompt_task_info.task_id;
1411 my_parallel_id = team->t.ompt_team_info.parallel_id;
1412
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001413 taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001414 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1415 ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
1416 my_parallel_id, my_task_id);
1417 }
1418 }
1419#endif
1420
Jim Cownie5e8470a2013-09-27 10:38:44 +00001421#if USE_ITT_BUILD
1422 // Note: These values are used by ITT events as well.
1423#endif /* USE_ITT_BUILD */
1424 taskdata->td_taskwait_counter += 1;
1425 taskdata->td_taskwait_ident = loc_ref;
1426 taskdata->td_taskwait_thread = gtid + 1;
1427
1428#if USE_ITT_BUILD
1429 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1430 if ( itt_sync_obj != NULL )
1431 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1432#endif /* USE_ITT_BUILD */
1433
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001434#if OMP_41_ENABLED
1435 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1436#else
1437 if ( ! taskdata->td_flags.team_serial )
1438#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001439 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001440 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001441 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001442 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001443 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1444 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001445 }
1446 }
1447#if USE_ITT_BUILD
1448 if ( itt_sync_obj != NULL )
1449 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1450#endif /* USE_ITT_BUILD */
1451
1452 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1453 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001454
1455#if OMPT_SUPPORT && OMPT_TRACE
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001456 if (ompt_enabled) {
1457 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1458 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001459 my_parallel_id, my_task_id);
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001460 }
1461 taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001462 }
1463#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001464 }
1465
1466 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1467 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1468
1469 return TASK_CURRENT_NOT_QUEUED;
1470}
1471
1472
1473//-------------------------------------------------
1474// __kmpc_omp_taskyield: switch to a different task
1475
1476kmp_int32
1477__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1478{
1479 kmp_taskdata_t * taskdata;
1480 kmp_info_t * thread;
1481 int thread_finished = FALSE;
1482
Jonathan Peyton45be4502015-08-11 21:36:41 +00001483 KMP_COUNT_BLOCK(OMP_TASKYIELD);
1484
Jim Cownie5e8470a2013-09-27 10:38:44 +00001485 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1486 gtid, loc_ref, end_part) );
1487
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001488 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001489 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1490
1491 thread = __kmp_threads[ gtid ];
1492 taskdata = thread -> th.th_current_task;
1493 // Should we model this as a task wait or not?
1494#if USE_ITT_BUILD
1495 // Note: These values are used by ITT events as well.
1496#endif /* USE_ITT_BUILD */
1497 taskdata->td_taskwait_counter += 1;
1498 taskdata->td_taskwait_ident = loc_ref;
1499 taskdata->td_taskwait_thread = gtid + 1;
1500
1501#if USE_ITT_BUILD
1502 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1503 if ( itt_sync_obj != NULL )
1504 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1505#endif /* USE_ITT_BUILD */
1506 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001507 kmp_task_team_t * task_team = thread->th.th_task_team;
1508 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001509 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001510 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1511 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1512 }
1513 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001514 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001515#if USE_ITT_BUILD
1516 if ( itt_sync_obj != NULL )
1517 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1518#endif /* USE_ITT_BUILD */
1519
1520 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1521 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1522 }
1523
1524 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1525 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1526
1527 return TASK_CURRENT_NOT_QUEUED;
1528}
1529
1530
1531#if OMP_40_ENABLED
1532//-------------------------------------------------------------------------------------
1533// __kmpc_taskgroup: Start a new taskgroup
1534
1535void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001536__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001537{
1538 kmp_info_t * thread = __kmp_threads[ gtid ];
1539 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1540 kmp_taskgroup_t * tg_new =
1541 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1542 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1543 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001544 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001545 tg_new->parent = taskdata->td_taskgroup;
1546 taskdata->td_taskgroup = tg_new;
1547}
1548
1549
1550//-------------------------------------------------------------------------------------
1551// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1552// and its descendants are complete
1553
1554void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001555__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001556{
1557 kmp_info_t * thread = __kmp_threads[ gtid ];
1558 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1559 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1560 int thread_finished = FALSE;
1561
1562 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1563 KMP_DEBUG_ASSERT( taskgroup != NULL );
1564
1565 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1566#if USE_ITT_BUILD
1567 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1568 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1569 if ( itt_sync_obj != NULL )
1570 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1571#endif /* USE_ITT_BUILD */
1572
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001573#if OMP_41_ENABLED
1574 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1575#else
1576 if ( ! taskdata->td_flags.team_serial )
1577#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001578 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001579 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001580 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001581 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1582 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001583 }
1584 }
1585
1586#if USE_ITT_BUILD
1587 if ( itt_sync_obj != NULL )
1588 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1589#endif /* USE_ITT_BUILD */
1590 }
1591 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1592
1593 // Restore parent taskgroup for the current task
1594 taskdata->td_taskgroup = taskgroup->parent;
1595 __kmp_thread_free( thread, taskgroup );
1596
1597 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1598}
1599#endif
1600
1601
1602//------------------------------------------------------
1603// __kmp_remove_my_task: remove a task from my own deque
1604
1605static kmp_task_t *
1606__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1607 kmp_int32 is_constrained )
1608{
1609 kmp_task_t * task;
1610 kmp_taskdata_t * taskdata;
1611 kmp_thread_data_t *thread_data;
1612 kmp_uint32 tail;
1613
1614 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1615 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1616
1617 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1618
1619 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1620 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1621 thread_data->td.td_deque_tail) );
1622
1623 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1624 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1625 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1626 thread_data->td.td_deque_tail) );
1627 return NULL;
1628 }
1629
1630 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1631
1632 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1633 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1634 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1635 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1636 thread_data->td.td_deque_tail) );
1637 return NULL;
1638 }
1639
1640 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1641 taskdata = thread_data -> td.td_deque[ tail ];
1642
1643 if (is_constrained) {
1644 // we need to check if the candidate obeys task scheduling constraint:
1645 // only child of current task can be scheduled
1646 kmp_taskdata_t * current = thread->th.th_current_task;
1647 kmp_int32 level = current->td_level;
1648 kmp_taskdata_t * parent = taskdata->td_parent;
1649 while ( parent != current && parent->td_level > level ) {
1650 parent = parent->td_parent; // check generation up to the level of the current task
1651 KMP_DEBUG_ASSERT(parent != NULL);
1652 }
1653 if ( parent != current ) {
1654 // If the tail task is not a child, then no other childs can appear in the deque.
1655 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1656 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1657 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1658 thread_data->td.td_deque_tail) );
1659 return NULL;
1660 }
1661 }
1662
1663 thread_data -> td.td_deque_tail = tail;
1664 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1665
1666 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1667
1668 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1669 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1670 thread_data->td.td_deque_tail) );
1671
1672 task = KMP_TASKDATA_TO_TASK( taskdata );
1673 return task;
1674}
1675
1676
1677//-----------------------------------------------------------
1678// __kmp_steal_task: remove a task from another thread's deque
1679// Assume that calling thread has already checked existence of
1680// task_team thread_data before calling this routine.
1681
1682static kmp_task_t *
1683__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1684 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1685 kmp_int32 is_constrained )
1686{
1687 kmp_task_t * task;
1688 kmp_taskdata_t * taskdata;
1689 kmp_thread_data_t *victim_td, *threads_data;
Jonathan Peyton7c4d66d2015-06-08 20:01:14 +00001690 kmp_int32 victim_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001691
1692 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1693
1694 threads_data = task_team -> tt.tt_threads_data;
1695 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1696
1697 victim_tid = victim->th.th_info.ds.ds_tid;
1698 victim_td = & threads_data[ victim_tid ];
1699
1700 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1701 "head=%u tail=%u\n",
1702 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1703 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1704
1705 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1706 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1707 {
1708 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1709 "ntasks=%d head=%u tail=%u\n",
1710 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1711 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1712 return NULL;
1713 }
1714
1715 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1716
1717 // Check again after we acquire the lock
1718 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1719 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1720 {
1721 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1722 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1723 "ntasks=%d head=%u tail=%u\n",
1724 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1725 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1726 return NULL;
1727 }
1728
1729 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1730
1731 if ( !is_constrained ) {
1732 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1733 // Bump head pointer and Wrap.
1734 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1735 } else {
1736 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1737 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1738 taskdata = victim_td -> td.td_deque[ tail ];
1739 // we need to check if the candidate obeys task scheduling constraint:
1740 // only child of current task can be scheduled
1741 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1742 kmp_int32 level = current->td_level;
1743 kmp_taskdata_t * parent = taskdata->td_parent;
1744 while ( parent != current && parent->td_level > level ) {
1745 parent = parent->td_parent; // check generation up to the level of the current task
1746 KMP_DEBUG_ASSERT(parent != NULL);
1747 }
1748 if ( parent != current ) {
1749 // If the tail task is not a child, then no other childs can appear in the deque (?).
1750 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1751 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1752 "ntasks=%d head=%u tail=%u\n",
1753 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1754 task_team, victim_td->td.td_deque_ntasks,
1755 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1756 return NULL;
1757 }
1758 victim_td -> td.td_deque_tail = tail;
1759 }
1760 if (*thread_finished) {
1761 // We need to un-mark this victim as a finished victim. This must be done before
1762 // releasing the lock, or else other threads (starting with the master victim)
1763 // might be prematurely released from the barrier!!!
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001764 kmp_uint32 count;
1765
1766 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001767
1768 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1769 gtid, count + 1, task_team) );
1770
1771 *thread_finished = FALSE;
1772 }
1773 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1774
1775 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1776
Jonathan Peyton45be4502015-08-11 21:36:41 +00001777 KMP_COUNT_BLOCK(TASK_stolen);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001778 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001779 "ntasks=%d head=%u tail=%u\n",
1780 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1781 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1782 victim_td->td.td_deque_tail) );
1783
1784 task = KMP_TASKDATA_TO_TASK( taskdata );
1785 return task;
1786}
1787
1788
1789//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001790// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001791// is statisfied (return true) or there are none left (return false).
1792// final_spin is TRUE if this is the spin at the release barrier.
1793// thread_finished indicates whether the thread is finished executing all
1794// the tasks it has on its deque, and is at the release barrier.
1795// spinner is the location on which to spin.
1796// spinner == NULL means only execute a single task and return.
1797// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001798template <class C>
1799static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1800 int *thread_finished
1801 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001802{
1803 kmp_task_team_t * task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001804 kmp_thread_data_t * threads_data;
1805 kmp_task_t * task;
1806 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1807 volatile kmp_uint32 * unfinished_threads;
1808 kmp_int32 nthreads, last_stolen, k, tid;
1809
1810 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1811 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1812
1813 task_team = thread -> th.th_task_team;
Jonathan Peyton54127982015-11-04 21:37:48 +00001814 if (task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001815
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001816 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001817 gtid, final_spin, *thread_finished) );
1818
1819 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1820 KMP_DEBUG_ASSERT( threads_data != NULL );
1821
1822 nthreads = task_team -> tt.tt_nproc;
1823 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001824#if OMP_41_ENABLED
1825 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1826#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001827 KMP_DEBUG_ASSERT( nthreads > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001828#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001829 KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1830
1831 // Choose tasks from our own work queue.
1832 start:
1833 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1834#if USE_ITT_BUILD && USE_ITT_NOTIFY
1835 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1836 if ( itt_sync_obj == NULL ) {
1837 // we are at fork barrier where we could not get the object reliably
1838 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1839 }
1840 __kmp_itt_task_starting( itt_sync_obj );
1841 }
1842#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1843 __kmp_invoke_task( gtid, task, current_task );
1844#if USE_ITT_BUILD
1845 if ( itt_sync_obj != NULL )
1846 __kmp_itt_task_finished( itt_sync_obj );
1847#endif /* USE_ITT_BUILD */
1848
1849 // If this thread is only partway through the barrier and the condition
1850 // is met, then return now, so that the barrier gather/release pattern can proceed.
1851 // If this thread is in the last spin loop in the barrier, waiting to be
1852 // released, we know that the termination condition will not be satisified,
1853 // so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001854 if (flag == NULL || (!final_spin && flag->done_check())) {
1855 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001856 return TRUE;
1857 }
Jonathan Peyton54127982015-11-04 21:37:48 +00001858 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001859 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1860 }
1861
1862 // This thread's work queue is empty. If we are in the final spin loop
1863 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001864#if OMP_41_ENABLED
1865 // The work queue may be empty but there might be proxy tasks still executing
1866 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1867#else
1868 if (final_spin)
1869#endif
1870 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001871 // First, decrement the #unfinished threads, if that has not already
1872 // been done. This decrement might be to the spin location, and
1873 // result in the termination condition being satisfied.
1874 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001875 kmp_uint32 count;
1876
1877 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001878 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001879 gtid, count, task_team) );
1880 *thread_finished = TRUE;
1881 }
1882
1883 // It is now unsafe to reference thread->th.th_team !!!
1884 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1885 // thread to pass through the barrier, where it might reset each thread's
1886 // th.th_team field for the next parallel region.
1887 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001888 if (flag != NULL && flag->done_check()) {
1889 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001890 return TRUE;
1891 }
1892 }
1893
Jonathan Peyton54127982015-11-04 21:37:48 +00001894 if (thread->th.th_task_team == NULL) return FALSE;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001895#if OMP_41_ENABLED
1896 // check if there are other threads to steal from, otherwise go back
1897 if ( nthreads == 1 )
1898 goto start;
1899#endif
1900
Jim Cownie5e8470a2013-09-27 10:38:44 +00001901 // Try to steal from the last place I stole from successfully.
1902 tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1903 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1904
1905 if (last_stolen != -1) {
1906 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1907
1908 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1909 thread_finished, is_constrained )) != NULL)
1910 {
1911#if USE_ITT_BUILD && USE_ITT_NOTIFY
1912 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1913 if ( itt_sync_obj == NULL ) {
1914 // we are at fork barrier where we could not get the object reliably
1915 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1916 }
1917 __kmp_itt_task_starting( itt_sync_obj );
1918 }
1919#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1920 __kmp_invoke_task( gtid, task, current_task );
1921#if USE_ITT_BUILD
1922 if ( itt_sync_obj != NULL )
1923 __kmp_itt_task_finished( itt_sync_obj );
1924#endif /* USE_ITT_BUILD */
1925
1926 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001927 if (flag == NULL || (!final_spin && flag->done_check())) {
1928 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001929 gtid) );
1930 return TRUE;
1931 }
1932
Jonathan Peyton54127982015-11-04 21:37:48 +00001933 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001934 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1935 // If the execution of the stolen task resulted in more tasks being
1936 // placed on our run queue, then restart the whole process.
1937 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001938 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001939 gtid) );
1940 goto start;
1941 }
1942 }
1943
1944 // Don't give priority to stealing from this thread anymore.
1945 threads_data[ tid ].td.td_deque_last_stolen = -1;
1946
1947 // The victims's work queue is empty. If we are in the final spin loop
1948 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001949#if OMP_41_ENABLED
1950 // The work queue may be empty but there might be proxy tasks still executing
1951 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1952#else
1953 if (final_spin)
1954#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001955 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001956 // First, decrement the #unfinished threads, if that has not already
1957 // been done. This decrement might be to the spin location, and
1958 // result in the termination condition being satisfied.
1959 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001960 kmp_uint32 count;
1961
1962 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001963 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964 "task_team=%p\n", gtid, count, task_team) );
1965 *thread_finished = TRUE;
1966 }
1967
1968 // If __kmp_tasking_mode != tskm_immediate_exec
1969 // then it is now unsafe to reference thread->th.th_team !!!
1970 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1971 // thread to pass through the barrier, where it might reset each thread's
1972 // th.th_team field for the next parallel region.
1973 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001974 if (flag != NULL && flag->done_check()) {
1975 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001976 gtid) );
1977 return TRUE;
1978 }
1979 }
Jonathan Peyton54127982015-11-04 21:37:48 +00001980 if (thread->th.th_task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001981 }
1982
1983 // Find a different thread to steal work from. Pick a random thread.
1984 // My initial plan was to cycle through all the threads, and only return
1985 // if we tried to steal from every thread, and failed. Arch says that's
1986 // not such a great idea.
1987 // GEH - need yield code in this loop for throughput library mode?
1988 new_victim:
1989 k = __kmp_get_random( thread ) % (nthreads - 1);
1990 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1991 ++k; // Adjusts random distribution to exclude self
1992 }
1993 {
1994 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1995 int first;
1996
1997 // There is a slight chance that __kmp_enable_tasking() did not wake up
1998 // all threads waiting at the barrier. If this thread is sleeping, then
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001999 // wake it up. Since we were going to pay the cache miss penalty
2000 // for referencing another thread's kmp_info_t struct anyway, the check
Jim Cownie5e8470a2013-09-27 10:38:44 +00002001 // shouldn't cost too much performance at this point.
2002 // In extra barrier mode, tasks do not sleep at the separate tasking
2003 // barrier, so this isn't a problem.
2004 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2005 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
2006 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
2007 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002008 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002009 // A sleeping thread should not have any tasks on it's queue.
Alp Toker8f2d3f02014-02-24 10:40:15 +00002010 // There is a slight possibility that it resumes, steals a task from
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002011 // another thread, which spawns more tasks, all in the time that it takes
Jim Cownie5e8470a2013-09-27 10:38:44 +00002012 // this thread to check => don't write an assertion that the victim's
2013 // queue is empty. Try stealing from a different thread.
2014 goto new_victim;
2015 }
2016
2017 // Now try to steal work from the selected thread
2018 first = TRUE;
2019 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
2020 thread_finished, is_constrained )) != NULL)
2021 {
2022#if USE_ITT_BUILD && USE_ITT_NOTIFY
2023 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
2024 if ( itt_sync_obj == NULL ) {
2025 // we are at fork barrier where we could not get the object reliably
2026 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
2027 }
2028 __kmp_itt_task_starting( itt_sync_obj );
2029 }
2030#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
2031 __kmp_invoke_task( gtid, task, current_task );
2032#if USE_ITT_BUILD
2033 if ( itt_sync_obj != NULL )
2034 __kmp_itt_task_finished( itt_sync_obj );
2035#endif /* USE_ITT_BUILD */
2036
2037 // Try stealing from this victim again, in the future.
2038 if (first) {
2039 threads_data[ tid ].td.td_deque_last_stolen = k;
2040 first = FALSE;
2041 }
2042
2043 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002044 if (flag == NULL || (!final_spin && flag->done_check())) {
2045 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00002046 gtid) );
2047 return TRUE;
2048 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002049 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002050 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
2051
2052 // If the execution of the stolen task resulted in more tasks being
2053 // placed on our run queue, then restart the whole process.
2054 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002055 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00002056 gtid) );
2057 goto start;
2058 }
2059 }
2060
2061 // The victims's work queue is empty. If we are in the final spin loop
2062 // of the barrier, check and see if the termination condition is satisfied.
2063 // Going on and finding a new victim to steal from is expensive, as it
2064 // involves a lot of cache misses, so we definitely want to re-check the
2065 // termination condition before doing that.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002066#if OMP_41_ENABLED
2067 // The work queue may be empty but there might be proxy tasks still executing
2068 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
2069#else
2070 if (final_spin)
2071#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002072 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002073 // First, decrement the #unfinished threads, if that has not already
2074 // been done. This decrement might be to the spin location, and
2075 // result in the termination condition being satisfied.
2076 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002077 kmp_uint32 count;
2078
2079 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002080 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
Jim Cownie5e8470a2013-09-27 10:38:44 +00002081 "task_team=%p\n",
2082 gtid, count, task_team) );
2083 *thread_finished = TRUE;
2084 }
2085
2086 // If __kmp_tasking_mode != tskm_immediate_exec,
2087 // then it is now unsafe to reference thread->th.th_team !!!
2088 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
2089 // thread to pass through the barrier, where it might reset each thread's
2090 // th.th_team field for the next parallel region.
2091 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002092 if (flag != NULL && flag->done_check()) {
2093 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002094 return TRUE;
2095 }
2096 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002097 if (thread->th.th_task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002098 }
2099
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002100 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002101 return FALSE;
2102}
2103
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002104int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
2105 int *thread_finished
2106 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2107{
2108 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2109 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2110}
2111
2112int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
2113 int *thread_finished
2114 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2115{
2116 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2117 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2118}
2119
2120int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
2121 int *thread_finished
2122 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2123{
2124 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2125 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2126}
2127
2128
Jim Cownie5e8470a2013-09-27 10:38:44 +00002129
2130//-----------------------------------------------------------------------------
2131// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2132// next barrier so they can assist in executing enqueued tasks.
2133// First thread in allocates the task team atomically.
2134
2135static void
2136__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2137{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002138 kmp_thread_data_t *threads_data;
2139 int nthreads, i, is_init_thread;
2140
2141 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2142 __kmp_gtid_from_thread( this_thr ) ) );
2143
2144 KMP_DEBUG_ASSERT(task_team != NULL);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002145 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002146
2147 nthreads = task_team->tt.tt_nproc;
2148 KMP_DEBUG_ASSERT(nthreads > 0);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002149 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002150
2151 // Allocate or increase the size of threads_data if necessary
2152 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2153
2154 if (!is_init_thread) {
2155 // Some other thread already set up the array.
2156 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2157 __kmp_gtid_from_thread( this_thr ) ) );
2158 return;
2159 }
2160 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2161 KMP_DEBUG_ASSERT( threads_data != NULL );
2162
2163 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2164 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2165 {
2166 // Release any threads sleeping at the barrier, so that they can steal
2167 // tasks and execute them. In extra barrier mode, tasks do not sleep
2168 // at the separate tasking barrier, so this isn't a problem.
2169 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002170 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002171 kmp_info_t *thread = threads_data[i].td.td_thr;
2172
2173 if (i == this_thr->th.th_info.ds.ds_tid) {
2174 continue;
2175 }
2176 // Since we haven't locked the thread's suspend mutex lock at this
2177 // point, there is a small window where a thread might be putting
2178 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002179 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002180 // see if other threads are sleeping (using the same random
2181 // mechanism that is used for task stealing) and awakens them if
2182 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002183 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002184 {
2185 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2186 __kmp_gtid_from_thread( this_thr ),
2187 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002188 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002189 }
2190 else {
2191 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2192 __kmp_gtid_from_thread( this_thr ),
2193 __kmp_gtid_from_thread( thread ) ) );
2194 }
2195 }
2196 }
2197
2198 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2199 __kmp_gtid_from_thread( this_thr ) ) );
2200}
2201
2202
2203/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002204/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00002205 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2206 * like a shadow of the kmp_team_t data struct, with a different lifetime.
2207 * After a child * thread checks into a barrier and calls __kmp_release() from
2208 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2209 * longer assume that the kmp_team_t structure is intact (at any moment, the
2210 * master thread may exit the barrier code and free the team data structure,
2211 * and return the threads to the thread pool).
2212 *
2213 * This does not work with the the tasking code, as the thread is still
2214 * expected to participate in the execution of any tasks that may have been
2215 * spawned my a member of the team, and the thread still needs access to all
2216 * to each thread in the team, so that it can steal work from it.
2217 *
2218 * Enter the existence of the kmp_task_team_t struct. It employs a reference
2219 * counting mechanims, and is allocated by the master thread before calling
2220 * __kmp_<barrier_kind>_release, and then is release by the last thread to
2221 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2222 * of the kmp_task_team_t structs for consecutive barriers can overlap
2223 * (and will, unless the master thread is the last thread to exit the barrier
2224 * release phase, which is not typical).
2225 *
2226 * The existence of such a struct is useful outside the context of tasking,
2227 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2228 * so that any performance differences show up when comparing the 2.5 vs. 3.0
2229 * libraries.
2230 *
2231 * We currently use the existence of the threads array as an indicator that
2232 * tasks were spawned since the last barrier. If the structure is to be
2233 * useful outside the context of tasking, then this will have to change, but
2234 * not settting the field minimizes the performance impact of tasking on
2235 * barriers, when no explicit tasks were spawned (pushed, actually).
2236 */
2237
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002238
Jim Cownie5e8470a2013-09-27 10:38:44 +00002239static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2240// Lock for task team data structures
2241static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2242
2243
2244//------------------------------------------------------------------------------
2245// __kmp_alloc_task_deque:
2246// Allocates a task deque for a particular thread, and initialize the necessary
2247// data structures relating to the deque. This only happens once per thread
2248// per task team since task teams are recycled.
2249// No lock is needed during allocation since each thread allocates its own
2250// deque.
2251
2252static void
2253__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2254{
2255 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2256 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2257
2258 // Initialize last stolen task field to "none"
2259 thread_data -> td.td_deque_last_stolen = -1;
2260
2261 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2262 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2263 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2264
2265 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2266 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2267 // Allocate space for task deque, and zero the deque
2268 // Cannot use __kmp_thread_calloc() because threads not around for
2269 // kmp_reap_task_team( ).
2270 thread_data -> td.td_deque = (kmp_taskdata_t **)
2271 __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2272}
2273
2274
2275//------------------------------------------------------------------------------
2276// __kmp_free_task_deque:
2277// Deallocates a task deque for a particular thread.
2278// Happens at library deallocation so don't need to reset all thread data fields.
2279
2280static void
2281__kmp_free_task_deque( kmp_thread_data_t *thread_data )
2282{
2283 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2284
2285 if ( thread_data -> td.td_deque != NULL ) {
2286 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2287 __kmp_free( thread_data -> td.td_deque );
2288 thread_data -> td.td_deque = NULL;
2289 }
2290 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2291
2292#ifdef BUILD_TIED_TASK_STACK
2293 // GEH: Figure out what to do here for td_susp_tied_tasks
2294 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2295 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2296 }
2297#endif // BUILD_TIED_TASK_STACK
2298}
2299
2300
2301//------------------------------------------------------------------------------
2302// __kmp_realloc_task_threads_data:
2303// Allocates a threads_data array for a task team, either by allocating an initial
2304// array or enlarging an existing array. Only the first thread to get the lock
2305// allocs or enlarges the array and re-initializes the array eleemnts.
2306// That thread returns "TRUE", the rest return "FALSE".
2307// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2308// The current size is given by task_team -> tt.tt_max_threads.
2309
2310static int
2311__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2312{
2313 kmp_thread_data_t ** threads_data_p;
2314 kmp_int32 nthreads, maxthreads;
2315 int is_init_thread = FALSE;
2316
2317 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2318 // Already reallocated and initialized.
2319 return FALSE;
2320 }
2321
2322 threads_data_p = & task_team -> tt.tt_threads_data;
2323 nthreads = task_team -> tt.tt_nproc;
2324 maxthreads = task_team -> tt.tt_max_threads;
2325
2326 // All threads must lock when they encounter the first task of the implicit task
2327 // region to make sure threads_data fields are (re)initialized before used.
2328 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2329
2330 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2331 // first thread to enable tasking
2332 kmp_team_t *team = thread -> th.th_team;
2333 int i;
2334
2335 is_init_thread = TRUE;
2336 if ( maxthreads < nthreads ) {
2337
2338 if ( *threads_data_p != NULL ) {
2339 kmp_thread_data_t *old_data = *threads_data_p;
2340 kmp_thread_data_t *new_data = NULL;
2341
2342 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2343 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2344 __kmp_gtid_from_thread( thread ), task_team,
2345 nthreads, maxthreads ) );
2346 // Reallocate threads_data to have more elements than current array
2347 // Cannot use __kmp_thread_realloc() because threads not around for
2348 // kmp_reap_task_team( ). Note all new array entries are initialized
2349 // to zero by __kmp_allocate().
2350 new_data = (kmp_thread_data_t *)
2351 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2352 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002353 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002354 (void *) old_data,
2355 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002356
2357#ifdef BUILD_TIED_TASK_STACK
2358 // GEH: Figure out if this is the right thing to do
2359 for (i = maxthreads; i < nthreads; i++) {
2360 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2361 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2362 }
2363#endif // BUILD_TIED_TASK_STACK
2364 // Install the new data and free the old data
2365 (*threads_data_p) = new_data;
2366 __kmp_free( old_data );
2367 }
2368 else {
2369 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2370 "threads data for task_team %p, size = %d\n",
2371 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2372 // Make the initial allocate for threads_data array, and zero entries
2373 // Cannot use __kmp_thread_calloc() because threads not around for
2374 // kmp_reap_task_team( ).
2375 *threads_data_p = (kmp_thread_data_t *)
2376 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2377#ifdef BUILD_TIED_TASK_STACK
2378 // GEH: Figure out if this is the right thing to do
2379 for (i = 0; i < nthreads; i++) {
2380 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2381 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2382 }
2383#endif // BUILD_TIED_TASK_STACK
2384 }
2385 task_team -> tt.tt_max_threads = nthreads;
2386 }
2387 else {
2388 // If array has (more than) enough elements, go ahead and use it
2389 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2390 }
2391
2392 // initialize threads_data pointers back to thread_info structures
2393 for (i = 0; i < nthreads; i++) {
2394 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2395 thread_data -> td.td_thr = team -> t.t_threads[i];
2396
2397 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2398 // The last stolen field survives across teams / barrier, and the number
2399 // of threads may have changed. It's possible (likely?) that a new
2400 // parallel region will exhibit the same behavior as the previous region.
2401 thread_data -> td.td_deque_last_stolen = -1;
2402 }
2403 }
2404
2405 KMP_MB();
2406 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2407 }
2408
2409 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2410 return is_init_thread;
2411}
2412
2413
2414//------------------------------------------------------------------------------
2415// __kmp_free_task_threads_data:
2416// Deallocates a threads_data array for a task team, including any attached
2417// tasking deques. Only occurs at library shutdown.
2418
2419static void
2420__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2421{
2422 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2423 if ( task_team -> tt.tt_threads_data != NULL ) {
2424 int i;
2425 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2426 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2427 }
2428 __kmp_free( task_team -> tt.tt_threads_data );
2429 task_team -> tt.tt_threads_data = NULL;
2430 }
2431 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2432}
2433
2434
2435//------------------------------------------------------------------------------
2436// __kmp_allocate_task_team:
2437// Allocates a task team associated with a specific team, taking it from
2438// the global task team free list if possible. Also initializes data structures.
2439
2440static kmp_task_team_t *
2441__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2442{
2443 kmp_task_team_t *task_team = NULL;
2444 int nthreads;
2445
2446 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2447 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2448
2449 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2450 // Take a task team from the task team pool
2451 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2452 if (__kmp_free_task_teams != NULL) {
2453 task_team = __kmp_free_task_teams;
2454 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2455 task_team -> tt.tt_next = NULL;
2456 }
2457 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2458 }
2459
2460 if (task_team == NULL) {
2461 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2462 "task team for team %p\n",
2463 __kmp_gtid_from_thread( thread ), team ) );
2464 // Allocate a new task team if one is not available.
2465 // Cannot use __kmp_thread_malloc() because threads not around for
2466 // kmp_reap_task_team( ).
2467 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2468 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2469 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2470 //task_team -> tt.tt_max_threads = 0;
2471 //task_team -> tt.tt_next = NULL;
2472 }
2473
2474 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002475#if OMP_41_ENABLED
2476 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2477#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002478 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2479
Jim Cownie5e8470a2013-09-27 10:38:44 +00002480 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2481 TCW_4( task_team -> tt.tt_active, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002482
Jonathan Peyton54127982015-11-04 21:37:48 +00002483 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
2484 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002485 return task_team;
2486}
2487
2488
2489//------------------------------------------------------------------------------
2490// __kmp_free_task_team:
2491// Frees the task team associated with a specific thread, and adds it
2492// to the global task team free list.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002493
Jonathan Peyton54127982015-11-04 21:37:48 +00002494void
Jim Cownie5e8470a2013-09-27 10:38:44 +00002495__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2496{
2497 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2498 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2499
Jim Cownie5e8470a2013-09-27 10:38:44 +00002500 // Put task team back on free list
2501 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2502
2503 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2504 task_team -> tt.tt_next = __kmp_free_task_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002505 TCW_PTR(__kmp_free_task_teams, task_team);
2506
2507 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2508}
2509
2510
2511//------------------------------------------------------------------------------
2512// __kmp_reap_task_teams:
2513// Free all the task teams on the task team free list.
2514// Should only be done during library shutdown.
2515// Cannot do anything that needs a thread structure or gtid since they are already gone.
2516
2517void
2518__kmp_reap_task_teams( void )
2519{
2520 kmp_task_team_t *task_team;
2521
2522 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2523 // Free all task_teams on the free list
2524 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2525 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2526 __kmp_free_task_teams = task_team -> tt.tt_next;
2527 task_team -> tt.tt_next = NULL;
2528
2529 // Free threads_data if necessary
2530 if ( task_team -> tt.tt_threads_data != NULL ) {
2531 __kmp_free_task_threads_data( task_team );
2532 }
2533 __kmp_free( task_team );
2534 }
2535 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2536 }
2537}
2538
Jim Cownie5e8470a2013-09-27 10:38:44 +00002539//------------------------------------------------------------------------------
2540// __kmp_wait_to_unref_task_teams:
2541// Some threads could still be in the fork barrier release code, possibly
2542// trying to steal tasks. Wait for each thread to unreference its task team.
2543//
2544void
2545__kmp_wait_to_unref_task_teams(void)
2546{
2547 kmp_info_t *thread;
2548 kmp_uint32 spins;
2549 int done;
2550
2551 KMP_INIT_YIELD( spins );
2552
Jim Cownie5e8470a2013-09-27 10:38:44 +00002553 for (;;) {
2554 done = TRUE;
2555
2556 // TODO: GEH - this may be is wrong because some sync would be necessary
2557 // in case threads are added to the pool during the traversal.
2558 // Need to verify that lock for thread pool is held when calling
2559 // this routine.
2560 for (thread = (kmp_info_t *)__kmp_thread_pool;
2561 thread != NULL;
2562 thread = thread->th.th_next_pool)
2563 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002564#if KMP_OS_WINDOWS
2565 DWORD exit_val;
2566#endif
2567 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2568 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2569 __kmp_gtid_from_thread( thread ) ) );
2570 continue;
2571 }
2572#if KMP_OS_WINDOWS
2573 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2574 if (!__kmp_is_thread_alive(thread, &exit_val)) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002575 thread->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002576 continue;
2577 }
2578#endif
2579
2580 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2581
2582 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2583 __kmp_gtid_from_thread( thread ) ) );
2584
2585 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002586 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002587 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002588 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002589 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2590 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002591 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002592 }
2593 }
2594 }
2595 if (done) {
2596 break;
2597 }
2598
2599 // If we are oversubscribed,
2600 // or have waited a bit (and library mode is throughput), yield.
2601 // Pause is in the following code.
2602 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2603 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2604 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002605}
2606
2607
2608//------------------------------------------------------------------------------
2609// __kmp_task_team_setup: Create a task_team for the current team, but use
2610// an already created, unused one if it already exists.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002611void
Jonathan Peyton54127982015-11-04 21:37:48 +00002612__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002613{
2614 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2615
Jonathan Peyton54127982015-11-04 21:37:48 +00002616 // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
2617 // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
2618 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002619 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002620 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002621 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002622 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002623 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002624
2625 // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is
2626 // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
2627 // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
2628 // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for
2629 // serialized teams.
Jonathan Peytone1dad192015-11-30 20:05:13 +00002630 if (team->t.t_nproc > 1) {
2631 int other_team = 1 - this_thr->th.th_task_state;
2632 if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
2633 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2634 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2635 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2636 ((team != NULL) ? team->t.t_id : -1), other_team ));
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002637 }
Jonathan Peytone1dad192015-11-30 20:05:13 +00002638 else { // Leave the old task team struct in place for the upcoming region; adjust as needed
2639 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2640 if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
2641 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2642 TCW_4(task_team->tt.tt_found_tasks, FALSE);
2643#if OMP_41_ENABLED
2644 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2645#endif
2646 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
2647 TCW_4(task_team->tt.tt_active, TRUE );
2648 }
2649 // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
2650 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
2651 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2652 ((team != NULL) ? team->t.t_id : -1), other_team ));
2653 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002654 }
2655}
2656
2657
2658//------------------------------------------------------------------------------
2659// __kmp_task_team_sync: Propagation of task team data from team to threads
2660// which happens just after the release phase of a team barrier. This may be
2661// called by any thread, but only for teams with # threads > 1.
2662
2663void
2664__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2665{
2666 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2667
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002668 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002669 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002670 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2671 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jonathan Peyton54127982015-11-04 21:37:48 +00002672 KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002673 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2674 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002675}
2676
2677
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002678//--------------------------------------------------------------------------------------------
2679// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
Jonathan Peyton54127982015-11-04 21:37:48 +00002680// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created.
2681// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
2682// optionally as the last argument. When wait is zero, master thread does not wait for
2683// unfinished_threads to reach 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002684void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002685__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002686 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jonathan Peyton54127982015-11-04 21:37:48 +00002687 , int wait)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002688{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002689 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002690
2691 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2692 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2693
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002694 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002695 if (wait) {
2696 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2697 __kmp_gtid_from_thread(this_thr), task_team));
2698 // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
2699 // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
2700 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2701 flag.wait(this_thr, TRUE
2702 USE_ITT_BUILD_ARG(itt_sync_obj));
2703 }
2704 // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
2705 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
2706 "setting active to false, setting local and team's pointer to NULL\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002707 __kmp_gtid_from_thread(this_thr), task_team));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002708#if OMP_41_ENABLED
2709 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2710 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2711#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002712 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002713#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002714 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2715 KMP_MB();
2716
2717 TCW_PTR(this_thr->th.th_task_team, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002718 }
2719}
2720
2721
2722//------------------------------------------------------------------------------
2723// __kmp_tasking_barrier:
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002724// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002725// Internal function to execute all tasks prior to a regular barrier or a
2726// join barrier. It is a full barrier itself, which unfortunately turns
2727// regular barriers into double barriers and join barriers into 1 1/2
2728// barriers.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002729void
2730__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2731{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002732 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002733 int flag = FALSE;
2734 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2735
2736#if USE_ITT_BUILD
2737 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2738#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002739 kmp_flag_32 spin_flag(spin, 0U);
2740 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2741 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002742#if USE_ITT_BUILD
2743 // TODO: What about itt_sync_obj??
2744 KMP_FSYNC_SPIN_PREPARE( spin );
2745#endif /* USE_ITT_BUILD */
2746
2747 if( TCR_4(__kmp_global.g.g_done) ) {
2748 if( __kmp_global.g.g_abort )
2749 __kmp_abort_thread( );
2750 break;
2751 }
2752 KMP_YIELD( TRUE ); // GH: We always yield here
2753 }
2754#if USE_ITT_BUILD
2755 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2756#endif /* USE_ITT_BUILD */
2757}
2758
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002759
2760#if OMP_41_ENABLED
2761
2762/* __kmp_give_task puts a task into a given thread queue if:
Jonathan Peytonff684e42016-02-11 22:58:29 +00002763 - the queue for that thread was created
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002764 - there's space in that queue
2765
2766 Because of this, __kmp_push_task needs to check if there's space after getting the lock
2767 */
2768static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2769{
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002770 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002771 kmp_task_team_t * task_team = taskdata->td_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002772
2773 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2774
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002775 // If task_team is NULL something went really bad...
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002776 KMP_DEBUG_ASSERT( task_team != NULL );
2777
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002778 bool result = false;
2779 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2780
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002781 if (thread_data -> td.td_deque == NULL ) {
2782 // There's no queue in this thread, go find another one
2783 // We're guaranteed that at least one thread has a queue
2784 KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2785 return result;
2786 }
2787
2788 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2789 {
2790 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2791 return result;
2792 }
2793
2794 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2795
2796 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2797 {
2798 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2799 goto release_and_exit;
2800 }
2801
2802 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2803 // Wrap index.
2804 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2805 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2806
2807 result = true;
Jonathan Peyton1406f012015-05-22 22:35:51 +00002808 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002809
2810release_and_exit:
2811 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2812
2813 return result;
2814}
2815
2816
2817/* The finish of the a proxy tasks is divided in two pieces:
2818 - the top half is the one that can be done from a thread outside the team
2819 - the bottom half must be run from a them within the team
2820
2821 In order to run the bottom half the task gets queued back into one of the threads of the team.
2822 Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2823 So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2824 - things that can be run before queuing the bottom half
2825 - things that must be run after queuing the bottom half
2826
2827 This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2828 we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2829*/
2830
2831static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2832{
2833 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2834 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2835 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2836 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2837
2838 taskdata -> td_flags.complete = 1; // mark the task as completed
2839
2840 if ( taskdata->td_taskgroup )
2841 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2842
2843 // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
2844 TCR_4(taskdata->td_incomplete_child_tasks++);
2845}
2846
2847static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2848{
2849 kmp_int32 children = 0;
2850
2851 // Predecrement simulated by "- 1" calculation
2852 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2853 KMP_DEBUG_ASSERT( children >= 0 );
2854
2855 // Remove the imaginary children
2856 TCR_4(taskdata->td_incomplete_child_tasks--);
2857}
2858
2859static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2860{
2861 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2862 kmp_info_t * thread = __kmp_threads[ gtid ];
2863
2864 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2865 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2866
2867 // We need to wait to make sure the top half is finished
2868 // Spinning here should be ok as this should happen quickly
2869 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2870
2871 __kmp_release_deps(gtid,taskdata);
2872 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2873}
2874
2875/*!
2876@ingroup TASKING
2877@param gtid Global Thread ID of encountering thread
2878@param ptask Task which execution is completed
2879
2880Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
2881*/
2882void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2883{
2884 KMP_DEBUG_ASSERT( ptask != NULL );
2885 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2886 KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2887
2888 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2889
2890 __kmp_first_top_half_finish_proxy(taskdata);
2891 __kmp_second_top_half_finish_proxy(taskdata);
2892 __kmp_bottom_half_finish_proxy(gtid,ptask);
2893
2894 KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2895}
2896
2897/*!
2898@ingroup TASKING
2899@param ptask Task which execution is completed
2900
2901Execute the completation of a proxy task from a thread that could not belong to the team.
2902*/
2903void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2904{
2905 KMP_DEBUG_ASSERT( ptask != NULL );
2906 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2907
2908 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2909
2910 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2911
2912 __kmp_first_top_half_finish_proxy(taskdata);
2913
Jonathan Peytonff684e42016-02-11 22:58:29 +00002914 // Enqueue task to complete bottom half completion from a thread within the corresponding team
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002915 kmp_team_t * team = taskdata->td_team;
2916 kmp_int32 nthreads = team->t.t_nproc;
2917 kmp_info_t *thread;
2918 kmp_int32 k = 0;
2919
2920 do {
Jonathan Peyton1406f012015-05-22 22:35:51 +00002921 //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002922 //For now we're just linearly trying to find a thread
2923 k = (k+1) % nthreads;
2924 thread = team->t.t_threads[k];
2925 } while ( !__kmp_give_task( thread, k, ptask ) );
2926
2927 __kmp_second_top_half_finish_proxy(taskdata);
2928
2929 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2930}
2931
Jonathan Peyton283a2152016-03-02 22:47:51 +00002932//---------------------------------------------------------------------------------
2933// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task for taskloop
2934//
2935// thread: allocating thread
2936// task_src: pointer to source task to be duplicated
2937// returns: a pointer to the allocated kmp_task_t structure (task).
2938kmp_task_t *
2939__kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src )
2940{
2941 kmp_task_t *task;
2942 kmp_taskdata_t *taskdata;
2943 kmp_taskdata_t *taskdata_src;
2944 kmp_taskdata_t *parent_task = thread->th.th_current_task;
2945 size_t shareds_offset;
2946 size_t task_size;
2947
2948 KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) );
2949 taskdata_src = KMP_TASK_TO_TASKDATA( task_src );
2950 KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL ); // it should not be proxy task
2951 KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT );
2952 task_size = taskdata_src->td_size_alloc;
2953
2954 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
2955 KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) );
2956 #if USE_FAST_MEMORY
2957 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size );
2958 #else
2959 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size );
2960 #endif /* USE_FAST_MEMORY */
2961 KMP_MEMCPY(taskdata, taskdata_src, task_size);
2962
2963 task = KMP_TASKDATA_TO_TASK(taskdata);
2964
2965 // Initialize new task (only specific fields not affected by memcpy)
2966 taskdata->td_task_id = KMP_GEN_TASK_ID();
2967 if( task->shareds != NULL ) { // need setup shareds pointer
2968 shareds_offset = (char*)task_src->shareds - (char*)taskdata_src;
2969 task->shareds = &((char*)taskdata)[shareds_offset];
2970 KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (sizeof(void*)-1)) == 0 );
2971 }
2972 taskdata->td_alloc_thread = thread;
2973 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
2974
2975 // Only need to keep track of child task counts if team parallel and tasking not serialized
2976 if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) {
2977 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
2978 if ( parent_task->td_taskgroup )
2979 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
2980 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
2981 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT )
2982 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
2983 }
2984
2985 KA_TRACE(20, ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
2986 thread, taskdata, taskdata->td_parent) );
2987#if OMPT_SUPPORT
2988 __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (void*)task->routine);
2989#endif
2990 return task;
2991}
2992
2993// Routine optionally generated by th ecompiler for setting the lastprivate flag
2994// and calling needed constructors for private/firstprivate objects
2995// (used to form taskloop tasks from pattern task)
2996typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
2997
2998//---------------------------------------------------------------------------------
2999// __kmp_taskloop_linear: Start tasks of the taskloop linearly
3000//
3001// loc Source location information
3002// gtid Global thread ID
3003// task Task with whole loop iteration range
3004// lb Pointer to loop lower bound
3005// ub Pointer to loop upper bound
3006// st Loop stride
3007// sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3008// grainsize Schedule value if specified
3009// task_dup Tasks duplication routine
3010void
3011__kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
3012 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3013 int sched, kmp_uint64 grainsize, void *task_dup )
3014{
3015 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
3016 kmp_uint64 tc;
3017 kmp_uint64 lower = *lb; // compiler provides global bounds here
3018 kmp_uint64 upper = *ub;
Samuel Antao11e4c532016-03-12 00:55:17 +00003019 kmp_uint64 i, num_tasks = 0, extras = 0;
Jonathan Peyton283a2152016-03-02 22:47:51 +00003020 kmp_info_t *thread = __kmp_threads[gtid];
3021 kmp_taskdata_t *current_task = thread->th.th_current_task;
3022 kmp_task_t *next_task;
3023 kmp_int32 lastpriv = 0;
3024 size_t lower_offset = (char*)lb - (char*)task; // remember offset of lb in the task structure
3025 size_t upper_offset = (char*)ub - (char*)task; // remember offset of ub in the task structure
3026
3027 // compute trip count
3028 if ( st == 1 ) { // most common case
3029 tc = upper - lower + 1;
3030 } else if ( st < 0 ) {
3031 tc = (lower - upper) / (-st) + 1;
3032 } else { // st > 0
3033 tc = (upper - lower) / st + 1;
3034 }
3035 if(tc == 0) {
3036 // free the pattern task and exit
3037 __kmp_task_start( gtid, task, current_task );
3038 // do not execute anything for zero-trip loop
3039 __kmp_task_finish( gtid, task, current_task );
3040 return;
3041 }
3042
3043 // compute num_tasks/grainsize based on the input provided
3044 switch( sched ) {
3045 case 0: // no schedule clause specified, we can choose the default
3046 // let's try to schedule (team_size*10) tasks
3047 grainsize = thread->th.th_team_nproc * 10;
3048 case 2: // num_tasks provided
3049 if( grainsize > tc ) {
3050 num_tasks = tc; // too big num_tasks requested, adjust values
3051 grainsize = 1;
3052 extras = 0;
3053 } else {
3054 num_tasks = grainsize;
3055 grainsize = tc / num_tasks;
3056 extras = tc % num_tasks;
3057 }
3058 break;
3059 case 1: // grainsize provided
3060 if( grainsize > tc ) {
3061 num_tasks = 1; // too big grainsize requested, adjust values
3062 grainsize = tc;
3063 extras = 0;
3064 } else {
3065 num_tasks = tc / grainsize;
3066 grainsize = tc / num_tasks; // adjust grainsize for balanced distribution of iterations
3067 extras = tc % num_tasks;
3068 }
3069 break;
3070 default:
3071 KMP_ASSERT2(0, "unknown scheduling of taskloop");
3072 }
3073 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3074 KMP_DEBUG_ASSERT(num_tasks > extras);
3075 KMP_DEBUG_ASSERT(num_tasks > 0);
3076
3077 // Main loop, launch num_tasks tasks, assign grainsize iterations each task
3078 for( i = 0; i < num_tasks; ++i ) {
3079 kmp_uint64 chunk_minus_1;
3080 if( extras == 0 ) {
3081 chunk_minus_1 = grainsize - 1;
3082 } else {
3083 chunk_minus_1 = grainsize;
3084 --extras; // first extras iterations get bigger chunk (grainsize+1)
3085 }
3086 upper = lower + st * chunk_minus_1;
3087 if( i == num_tasks - 1 ) {
3088 // schedule the last task, set lastprivate flag
3089 lastpriv = 1;
3090#if KMP_DEBUG
3091 if( st == 1 )
3092 KMP_DEBUG_ASSERT(upper == *ub);
3093 else if( st > 0 )
3094 KMP_DEBUG_ASSERT(upper+st > *ub);
3095 else
3096 KMP_DEBUG_ASSERT(upper+st < *ub);
3097#endif
3098 }
3099 next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
3100 *(kmp_uint64*)((char*)next_task + lower_offset) = lower; // adjust task-specific bounds
3101 *(kmp_uint64*)((char*)next_task + upper_offset) = upper;
3102 if( ptask_dup != NULL )
3103 ptask_dup(next_task, task, lastpriv); // set lastprivate flag, construct fistprivates, etc.
3104 __kmp_omp_task(gtid, next_task, true); // schedule new task
3105 lower = upper + st; // adjust lower bound for the next iteration
3106 }
3107 // free the pattern task and exit
3108 __kmp_task_start( gtid, task, current_task );
3109 // do not execute the pattern task, just do bookkeeping
3110 __kmp_task_finish( gtid, task, current_task );
3111}
3112
3113/*!
3114@ingroup TASKING
3115@param loc Source location information
3116@param gtid Global thread ID
3117@param task Task structure
3118@param if_val Value of the if clause
3119@param lb Pointer to loop lower bound
3120@param ub Pointer to loop upper bound
3121@param st Loop stride
3122@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
3123@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3124@param grainsize Schedule value if specified
3125@param task_dup Tasks duplication routine
3126
3127Execute the taskloop construct.
3128*/
3129void
3130__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
3131 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3132 int nogroup, int sched, kmp_uint64 grainsize, void *task_dup )
3133{
3134 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
3135 KMP_DEBUG_ASSERT( task != NULL );
3136
3137 KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n",
3138 gtid, taskdata, *lb, *ub, st, grainsize, sched));
3139
3140 // check if clause value first
3141 if( if_val == 0 ) { // if(0) specified, mark task as serial
3142 taskdata->td_flags.task_serial = 1;
3143 taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
3144 }
3145 if( nogroup == 0 ) {
3146 __kmpc_taskgroup( loc, gtid );
3147 }
3148
3149 if( 1 /* AC: use some heuristic here to choose task scheduling method */ ) {
3150 __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup );
3151 }
3152
3153 if( nogroup == 0 ) {
3154 __kmpc_end_taskgroup( loc, gtid );
3155 }
3156 KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
3157}
3158
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003159#endif