blob: 044b7ff442c6221734c938229b16e14e8e5b5be6 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jonathan Peyton45be4502015-08-11 21:36:41 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanove5f44922015-04-29 16:22:07 +000022#if OMPT_SUPPORT
23#include "ompt-specific.h"
24#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
Jim Cownie5e8470a2013-09-27 10:38:44 +000026/* ------------------------------------------------------------------------ */
27/* ------------------------------------------------------------------------ */
28
29
30/* forward declaration */
31static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
34
Andrey Churbanov535b6fa2015-05-07 17:41:51 +000035#ifdef OMP_41_ENABLED
36static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
37#endif
38
Jim Cownie4cc4bb42014-10-07 16:25:50 +000039static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
Jonathan Peytona0e159f2015-10-08 18:23:38 +000040 if (!flag) return;
Jonathan Peyton3f5dfc22015-11-09 16:31:51 +000041 // Attempt to wake up a thread: examine its type and call appropriate template
Jim Cownie4cc4bb42014-10-07 16:25:50 +000042 switch (((kmp_flag_64 *)flag)->get_type()) {
43 case flag32: __kmp_resume_32(gtid, NULL); break;
44 case flag64: __kmp_resume_64(gtid, NULL); break;
45 case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
46 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000047}
48
49#ifdef BUILD_TIED_TASK_STACK
50
51//---------------------------------------------------------------------------
52// __kmp_trace_task_stack: print the tied tasks from the task stack in order
53// from top do bottom
54//
55// gtid: global thread identifier for thread containing stack
56// thread_data: thread data for task team thread containing stack
57// threshold: value above which the trace statement triggers
58// location: string identifying call site of this function (for trace)
59
60static void
61__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
62{
63 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
64 kmp_taskdata_t **stack_top = task_stack -> ts_top;
65 kmp_int32 entries = task_stack -> ts_entries;
66 kmp_taskdata_t *tied_task;
67
68 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
69 "first_block = %p, stack_top = %p \n",
70 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
71
72 KMP_DEBUG_ASSERT( stack_top != NULL );
73 KMP_DEBUG_ASSERT( entries > 0 );
74
75 while ( entries != 0 )
76 {
77 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
78 // fix up ts_top if we need to pop from previous block
79 if ( entries & TASK_STACK_INDEX_MASK == 0 )
80 {
81 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
82
83 stack_block = stack_block -> sb_prev;
84 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
85 }
86
87 // finish bookkeeping
88 stack_top--;
89 entries--;
90
91 tied_task = * stack_top;
92
93 KMP_DEBUG_ASSERT( tied_task != NULL );
94 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
95
96 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
97 "stack_top=%p, tied_task=%p\n",
98 location, gtid, entries, stack_top, tied_task ) );
99 }
100 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
101
102 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
103 location, gtid ) );
104}
105
106//---------------------------------------------------------------------------
107// __kmp_init_task_stack: initialize the task stack for the first time
108// after a thread_data structure is created.
109// It should not be necessary to do this again (assuming the stack works).
110//
111// gtid: global thread identifier of calling thread
112// thread_data: thread data for task team thread containing stack
113
114static void
115__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
116{
117 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
118 kmp_stack_block_t *first_block;
119
120 // set up the first block of the stack
121 first_block = & task_stack -> ts_first_block;
122 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
123 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
124
125 // initialize the stack to be empty
126 task_stack -> ts_entries = TASK_STACK_EMPTY;
127 first_block -> sb_next = NULL;
128 first_block -> sb_prev = NULL;
129}
130
131
132//---------------------------------------------------------------------------
133// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
134//
135// gtid: global thread identifier for calling thread
136// thread_data: thread info for thread containing stack
137
138static void
139__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
140{
141 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
142 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
143
144 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
145 // free from the second block of the stack
146 while ( stack_block != NULL ) {
147 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
148
149 stack_block -> sb_next = NULL;
150 stack_block -> sb_prev = NULL;
151 if (stack_block != & task_stack -> ts_first_block) {
152 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
153 }
154 stack_block = next_block;
155 }
156 // initialize the stack to be empty
157 task_stack -> ts_entries = 0;
158 task_stack -> ts_top = NULL;
159}
160
161
162//---------------------------------------------------------------------------
163// __kmp_push_task_stack: Push the tied task onto the task stack.
164// Grow the stack if necessary by allocating another block.
165//
166// gtid: global thread identifier for calling thread
167// thread: thread info for thread containing stack
168// tied_task: the task to push on the stack
169
170static void
171__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
172{
173 // GEH - need to consider what to do if tt_threads_data not allocated yet
174 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
175 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
176 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
177
178 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
179 return; // Don't push anything on stack if team or team tasks are serialized
180 }
181
182 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
183 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
184
185 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
186 gtid, thread, tied_task ) );
187 // Store entry
188 * (task_stack -> ts_top) = tied_task;
189
190 // Do bookkeeping for next push
191 task_stack -> ts_top++;
192 task_stack -> ts_entries++;
193
194 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
195 {
196 // Find beginning of this task block
197 kmp_stack_block_t *stack_block =
198 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
199
200 // Check if we already have a block
201 if ( stack_block -> sb_next != NULL )
202 { // reset ts_top to beginning of next block
203 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
204 }
205 else
206 { // Alloc new block and link it up
207 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
208 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
209
210 task_stack -> ts_top = & new_block -> sb_block[0];
211 stack_block -> sb_next = new_block;
212 new_block -> sb_prev = stack_block;
213 new_block -> sb_next = NULL;
214
215 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
216 gtid, tied_task, new_block ) );
217 }
218 }
219 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
220}
221
222//---------------------------------------------------------------------------
223// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
224// the task, just check to make sure it matches the ending task passed in.
225//
226// gtid: global thread identifier for the calling thread
227// thread: thread info structure containing stack
228// tied_task: the task popped off the stack
229// ending_task: the task that is ending (should match popped task)
230
231static void
232__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
233{
234 // GEH - need to consider what to do if tt_threads_data not allocated yet
235 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
236 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
237 kmp_taskdata_t *tied_task;
238
239 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
240 return; // Don't pop anything from stack if team or team tasks are serialized
241 }
242
243 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
244 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
245
246 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
247
248 // fix up ts_top if we need to pop from previous block
249 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
250 {
251 kmp_stack_block_t *stack_block =
252 (kmp_stack_block_t *) (task_stack -> ts_top) ;
253
254 stack_block = stack_block -> sb_prev;
255 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
256 }
257
258 // finish bookkeeping
259 task_stack -> ts_top--;
260 task_stack -> ts_entries--;
261
262 tied_task = * (task_stack -> ts_top );
263
264 KMP_DEBUG_ASSERT( tied_task != NULL );
265 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
266 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
267
268 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
269 return;
270}
271#endif /* BUILD_TIED_TASK_STACK */
272
273//---------------------------------------------------
274// __kmp_push_task: Add a task to the thread's deque
275
276static kmp_int32
277__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
278{
279 kmp_info_t * thread = __kmp_threads[ gtid ];
280 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
281 kmp_task_team_t * task_team = thread->th.th_task_team;
282 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
283 kmp_thread_data_t * thread_data;
284
285 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
286
287 // The first check avoids building task_team thread data if serialized
288 if ( taskdata->td_flags.task_serial ) {
289 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
290 gtid, taskdata ) );
291 return TASK_NOT_PUSHED;
292 }
293
294 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
295 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000296 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000297 __kmp_enable_tasking( task_team, thread );
298 }
299 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
300 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
301
302 // Find tasking deque specific to encountering thread
303 thread_data = & task_team -> tt.tt_threads_data[ tid ];
304
305 // No lock needed since only owner can allocate
306 if (thread_data -> td.td_deque == NULL ) {
307 __kmp_alloc_task_deque( thread, thread_data );
308 }
309
310 // Check if deque is full
311 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
312 {
313 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
314 gtid, taskdata ) );
315 return TASK_NOT_PUSHED;
316 }
317
318 // Lock the deque for the task push operation
319 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
320
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000321#if OMP_41_ENABLED
322 // Need to recheck as we can get a proxy task from a thread outside of OpenMP
323 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
324 {
325 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
326 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
327 gtid, taskdata ) );
328 return TASK_NOT_PUSHED;
329 }
330#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000331 // Must have room since no thread can add tasks but calling thread
332 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000333#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000334
335 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
336 // Wrap index.
337 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
338 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
339
340 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
341
342 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
343 "task=%p ntasks=%d head=%u tail=%u\n",
344 gtid, taskdata, thread_data->td.td_deque_ntasks,
345 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
346
347 return TASK_SUCCESSFULLY_PUSHED;
348}
349
350
351//-----------------------------------------------------------------------------------------
352// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
353// this_thr: thread structure to set current_task in.
354
355void
356__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
357{
358 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
359 "curtask_parent=%p\n",
360 0, this_thr, this_thr -> th.th_current_task,
361 this_thr -> th.th_current_task -> td_parent ) );
362
363 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
364
365 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
366 "curtask_parent=%p\n",
367 0, this_thr, this_thr -> th.th_current_task,
368 this_thr -> th.th_current_task -> td_parent ) );
369}
370
371
372//---------------------------------------------------------------------------------------
373// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
374// this_thr: thread structure to set up
375// team: team for implicit task data
376// tid: thread within team to set up
377
378void
379__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
380{
381 // current task of the thread is a parent of the new just created implicit tasks of new team
382 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
383 "parent_task=%p\n",
384 tid, this_thr, this_thr->th.th_current_task,
385 team->t.t_implicit_task_taskdata[tid].td_parent ) );
386
387 KMP_DEBUG_ASSERT (this_thr != NULL);
388
389 if( tid == 0 ) {
390 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
391 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
392 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
393 }
394 } else {
395 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
396 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
397 }
398
399 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
400 "parent_task=%p\n",
401 tid, this_thr, this_thr->th.th_current_task,
402 team->t.t_implicit_task_taskdata[tid].td_parent ) );
403}
404
405
406//----------------------------------------------------------------------
407// __kmp_task_start: bookkeeping for a task starting execution
408// GTID: global thread id of calling thread
409// task: task starting execution
410// current_task: task suspending
411
412static void
413__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
414{
415 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
416 kmp_info_t * thread = __kmp_threads[ gtid ];
417
418 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
419 gtid, taskdata, current_task) );
420
421 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
422
423 // mark currently executing task as suspended
424 // TODO: GEH - make sure root team implicit task is initialized properly.
425 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
426 current_task -> td_flags.executing = 0;
427
428 // Add task to stack if tied
429#ifdef BUILD_TIED_TASK_STACK
430 if ( taskdata -> td_flags.tiedness == TASK_TIED )
431 {
432 __kmp_push_task_stack( gtid, thread, taskdata );
433 }
434#endif /* BUILD_TIED_TASK_STACK */
435
436 // mark starting task as executing and as current task
437 thread -> th.th_current_task = taskdata;
438
439 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
440 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
441 taskdata -> td_flags.started = 1;
442 taskdata -> td_flags.executing = 1;
443 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
444 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
445
446 // GEH TODO: shouldn't we pass some sort of location identifier here?
447 // APT: yes, we will pass location here.
448 // need to store current thread state (in a thread or taskdata structure)
449 // before setting work_state, otherwise wrong state is set after end of task
450
451 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
452 gtid, taskdata ) );
453
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000454#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000455 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000456 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
457 kmp_taskdata_t *parent = taskdata->td_parent;
458 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
459 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
460 parent ? &(parent->ompt_task_info.frame) : NULL,
461 taskdata->ompt_task_info.task_id,
462 taskdata->ompt_task_info.function);
463 }
464#endif
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000465#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
466 /* OMPT emit all dependences if requested by the tool */
467 if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
468 ompt_callbacks.ompt_callback(ompt_event_task_dependences))
469 {
470 ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
471 taskdata->ompt_task_info.task_id,
472 taskdata->ompt_task_info.deps,
473 taskdata->ompt_task_info.ndeps
474 );
475 /* We can now free the allocated memory for the dependencies */
476 KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps);
477 taskdata->ompt_task_info.deps = NULL;
478 taskdata->ompt_task_info.ndeps = 0;
479 }
480#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000481
Jim Cownie5e8470a2013-09-27 10:38:44 +0000482 return;
483}
484
485
486//----------------------------------------------------------------------
487// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
488// loc_ref: source location information; points to beginning of task block.
489// gtid: global thread number.
490// task: task thunk for the started task.
491
492void
493__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
494{
495 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
496 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
497
498 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
499 gtid, loc_ref, taskdata, current_task ) );
500
501 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
502 __kmp_task_start( gtid, task, current_task );
503
504 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
505 gtid, loc_ref, taskdata ) );
506
507 return;
508}
509
510#ifdef TASK_UNUSED
511//----------------------------------------------------------------------
512// __kmpc_omp_task_begin: report that a given task has started execution
513// NEVER GENERATED BY COMPILER, DEPRECATED!!!
514
515void
516__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
517{
518 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
519
520 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
521 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
522
523 __kmp_task_start( gtid, task, current_task );
524
525 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
526 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
527
528 return;
529}
530#endif // TASK_UNUSED
531
532
533//-------------------------------------------------------------------------------------
534// __kmp_free_task: free the current task space and the space for shareds
535// gtid: Global thread ID of calling thread
536// taskdata: task to free
537// thread: thread data structure of caller
538
539static void
540__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
541{
542 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
543 gtid, taskdata) );
544
545 // Check to make sure all flags and counters have the correct values
546 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
547 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
548 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
549 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
550 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
551 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
552
553 taskdata->td_flags.freed = 1;
554 // deallocate the taskdata and shared variable blocks associated with this task
555 #if USE_FAST_MEMORY
556 __kmp_fast_free( thread, taskdata );
557 #else /* ! USE_FAST_MEMORY */
558 __kmp_thread_free( thread, taskdata );
559 #endif
560
561 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
562 gtid, taskdata) );
563}
564
565//-------------------------------------------------------------------------------------
566// __kmp_free_task_and_ancestors: free the current task and ancestors without children
567//
568// gtid: Global thread ID of calling thread
569// taskdata: task to free
570// thread: thread data structure of caller
571
572static void
573__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
574{
575 kmp_int32 children = 0;
576 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
577
578 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
579
580 if ( !team_or_tasking_serialized ) {
581 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
582 KMP_DEBUG_ASSERT( children >= 0 );
583 }
584
585 // Now, go up the ancestor tree to see if any ancestors can now be freed.
586 while ( children == 0 )
587 {
588 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
589
590 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
591 "and freeing itself\n", gtid, taskdata) );
592
593 // --- Deallocate my ancestor task ---
594 __kmp_free_task( gtid, taskdata, thread );
595
596 taskdata = parent_taskdata;
597
598 // Stop checking ancestors at implicit task or if tasking serialized
599 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
600 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
601 return;
602
603 if ( !team_or_tasking_serialized ) {
604 // Predecrement simulated by "- 1" calculation
605 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
606 KMP_DEBUG_ASSERT( children >= 0 );
607 }
608 }
609
610 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
611 "not freeing it yet\n", gtid, taskdata, children) );
612}
613
614//---------------------------------------------------------------------
615// __kmp_task_finish: bookkeeping to do when a task finishes execution
616// gtid: global thread ID for calling thread
617// task: task to be finished
618// resumed_task: task to be resumed. (may be NULL if task is serialized)
619
620static void
621__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
622{
623 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
624 kmp_info_t * thread = __kmp_threads[ gtid ];
625 kmp_int32 children = 0;
626
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000627#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000628 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000629 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
630 kmp_taskdata_t *parent = taskdata->td_parent;
631 ompt_callbacks.ompt_callback(ompt_event_task_end)(
632 taskdata->ompt_task_info.task_id);
633 }
634#endif
635
Jim Cownie5e8470a2013-09-27 10:38:44 +0000636 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
637 gtid, taskdata, resumed_task) );
638
639 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
640
641 // Pop task from stack if tied
642#ifdef BUILD_TIED_TASK_STACK
643 if ( taskdata -> td_flags.tiedness == TASK_TIED )
644 {
645 __kmp_pop_task_stack( gtid, thread, taskdata );
646 }
647#endif /* BUILD_TIED_TASK_STACK */
648
Jim Cownie5e8470a2013-09-27 10:38:44 +0000649 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000650 taskdata -> td_flags.complete = 1; // mark the task as completed
651 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
652 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
653
654 // Only need to keep track of count if team parallel and tasking not serialized
655 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
656 // Predecrement simulated by "- 1" calculation
657 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
658 KMP_DEBUG_ASSERT( children >= 0 );
659#if OMP_40_ENABLED
660 if ( taskdata->td_taskgroup )
661 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000662 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000663#endif
664 }
665
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000666 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
667 // Othertwise, if a task is executed immediately from the release_deps code
668 // the flag will be reset to 1 again by this same function
669 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
670 taskdata -> td_flags.executing = 0; // suspend the finishing task
671
Jim Cownie5e8470a2013-09-27 10:38:44 +0000672 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
673 gtid, taskdata, children) );
674
Jim Cownie181b4bb2013-12-23 17:28:57 +0000675#if OMP_40_ENABLED
676 /* If the tasks' destructor thunk flag has been set, we need to invoke the
677 destructor thunk that has been generated by the compiler.
678 The code is placed here, since at this point other tasks might have been released
679 hence overlapping the destructor invokations with some other work in the
680 released tasks. The OpenMP spec is not specific on when the destructors are
681 invoked, so we should be free to choose.
Jonathan Peyton28510722016-02-25 18:04:09 +0000682 */
Jim Cownie181b4bb2013-12-23 17:28:57 +0000683 if (taskdata->td_flags.destructors_thunk) {
Jonathan Peyton28510722016-02-25 18:04:09 +0000684 kmp_routine_entry_t destr_thunk = task->data1.destructors;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000685 KMP_ASSERT(destr_thunk);
686 destr_thunk(gtid, task);
687 }
688#endif // OMP_40_ENABLED
689
Jim Cownie5e8470a2013-09-27 10:38:44 +0000690 // bookkeeping for resuming task:
691 // GEH - note tasking_ser => task_serial
692 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
693 taskdata->td_flags.task_serial);
694 if ( taskdata->td_flags.task_serial )
695 {
696 if (resumed_task == NULL) {
697 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
698 }
699 else {
700 // verify resumed task passed in points to parent
701 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
702 }
703 }
704 else {
705 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
706 }
707
708 // Free this task and then ancestor tasks if they have no children.
Jonathan Peyton727ba6e2016-01-27 21:20:26 +0000709 // Restore th_current_task first as suggested by John:
710 // johnmc: if an asynchronous inquiry peers into the runtime system
711 // it doesn't see the freed task as the current task.
712 thread->th.th_current_task = resumed_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000713 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
714
Jim Cownie5e8470a2013-09-27 10:38:44 +0000715 // TODO: GEH - make sure root team implicit task is initialized properly.
716 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
717 resumed_task->td_flags.executing = 1; // resume previous task
718
719 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
720 gtid, taskdata, resumed_task) );
721
722 return;
723}
724
725//---------------------------------------------------------------------
726// __kmpc_omp_task_complete_if0: report that a task has completed execution
727// loc_ref: source location information; points to end of task block.
728// gtid: global thread number.
729// task: task thunk for the completed task.
730
731void
732__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
733{
734 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
735 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
736
737 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
738
739 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
740 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
741
742 return;
743}
744
745#ifdef TASK_UNUSED
746//---------------------------------------------------------------------
747// __kmpc_omp_task_complete: report that a task has completed execution
748// NEVER GENERATED BY COMPILER, DEPRECATED!!!
749
750void
751__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
752{
753 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
754 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
755
756 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
757
758 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
759 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
760 return;
761}
762#endif // TASK_UNUSED
763
764
Andrey Churbanove5f44922015-04-29 16:22:07 +0000765#if OMPT_SUPPORT
766//----------------------------------------------------------------------------------------------------
767// __kmp_task_init_ompt:
Jonathan Peytonb401db62015-10-09 17:38:05 +0000768// Initialize OMPT fields maintained by a task. This will only be called after
769// ompt_tool, so we already know whether ompt is enabled or not.
Andrey Churbanove5f44922015-04-29 16:22:07 +0000770
Jonathan Peytonb401db62015-10-09 17:38:05 +0000771static inline void
772__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function )
Andrey Churbanove5f44922015-04-29 16:22:07 +0000773{
Jonathan Peytonb401db62015-10-09 17:38:05 +0000774 if (ompt_enabled) {
775 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
776 task->ompt_task_info.function = function;
777 task->ompt_task_info.frame.exit_runtime_frame = NULL;
778 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000779#if OMP_40_ENABLED
780 task->ompt_task_info.ndeps = 0;
781 task->ompt_task_info.deps = NULL;
782#endif /* OMP_40_ENABLED */
Jonathan Peytonb401db62015-10-09 17:38:05 +0000783 }
Andrey Churbanove5f44922015-04-29 16:22:07 +0000784}
785#endif
786
787
Jim Cownie5e8470a2013-09-27 10:38:44 +0000788//----------------------------------------------------------------------------------------------------
789// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
790//
791// loc_ref: reference to source location of parallel region
792// this_thr: thread data structure corresponding to implicit task
793// team: team for this_thr
794// tid: thread id of given thread within team
795// set_curr_task: TRUE if need to push current task to thread
796// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
797// TODO: Get better loc_ref. Value passed in may be NULL
798
799void
800__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
801{
802 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
803
804 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
805 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
806
807 task->td_task_id = KMP_GEN_TASK_ID();
808 task->td_team = team;
809// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
810 task->td_ident = loc_ref;
811 task->td_taskwait_ident = NULL;
812 task->td_taskwait_counter = 0;
813 task->td_taskwait_thread = 0;
814
815 task->td_flags.tiedness = TASK_TIED;
816 task->td_flags.tasktype = TASK_IMPLICIT;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000817#if OMP_41_ENABLED
818 task->td_flags.proxy = TASK_FULL;
819#endif
820
Jim Cownie5e8470a2013-09-27 10:38:44 +0000821 // All implicit tasks are executed immediately, not deferred
822 task->td_flags.task_serial = 1;
823 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
824 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
825
826 task->td_flags.started = 1;
827 task->td_flags.executing = 1;
828 task->td_flags.complete = 0;
829 task->td_flags.freed = 0;
830
Jim Cownie181b4bb2013-12-23 17:28:57 +0000831#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000832 task->td_dephash = NULL;
833 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000834#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000835
836 if (set_curr_task) { // only do this initialization the first time a thread is created
837 task->td_incomplete_child_tasks = 0;
838 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
839#if OMP_40_ENABLED
840 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
841#endif
842 __kmp_push_current_task_to_thread( this_thr, team, tid );
843 } else {
844 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
845 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
846 }
847
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000848#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +0000849 __kmp_task_init_ompt(task, tid, NULL);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000850#endif
851
Jim Cownie5e8470a2013-09-27 10:38:44 +0000852 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
853 tid, team, task ) );
854}
855
856// Round up a size to a power of two specified by val
857// Used to insert padding between structures co-allocated using a single malloc() call
858static size_t
859__kmp_round_up_to_val( size_t size, size_t val ) {
860 if ( size & ( val - 1 ) ) {
861 size &= ~ ( val - 1 );
862 if ( size <= KMP_SIZE_T_MAX - val ) {
863 size += val; // Round up if there is no overflow.
864 }; // if
865 }; // if
866 return size;
867} // __kmp_round_up_to_va
868
869
870//---------------------------------------------------------------------------------
871// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
872//
873// loc_ref: source location information
874// gtid: global thread number.
875// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
876// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
877// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
878// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
879// task_entry: Pointer to task code entry point generated by compiler.
880// returns: a pointer to the allocated kmp_task_t structure (task).
881
882kmp_task_t *
883__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
884 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
885 kmp_routine_entry_t task_entry )
886{
887 kmp_task_t *task;
888 kmp_taskdata_t *taskdata;
889 kmp_info_t *thread = __kmp_threads[ gtid ];
890 kmp_team_t *team = thread->th.th_team;
891 kmp_taskdata_t *parent_task = thread->th.th_current_task;
892 size_t shareds_offset;
893
894 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
895 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
896 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
897 sizeof_shareds, task_entry) );
898
899 if ( parent_task->td_flags.final ) {
900 if (flags->merged_if0) {
901 }
902 flags->final = 1;
903 }
904
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000905#if OMP_41_ENABLED
906 if ( flags->proxy == TASK_PROXY ) {
907 flags->tiedness = TASK_UNTIED;
908 flags->merged_if0 = 1;
909
910 /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
911 if ( (thread->th.th_task_team) == NULL ) {
912 /* This should only happen if the team is serialized
913 setup a task team and propagate it to the thread
914 */
915 KMP_DEBUG_ASSERT(team->t.t_serialized);
916 KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
Jonathan Peyton54127982015-11-04 21:37:48 +0000917 __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000918 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
919 }
920 kmp_task_team_t * task_team = thread->th.th_task_team;
921
922 /* tasking must be enabled now as the task might not be pushed */
923 if ( !KMP_TASKING_ENABLED( task_team ) ) {
924 KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
925 __kmp_enable_tasking( task_team, thread );
926 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
927 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
928 // No lock needed since only owner can allocate
929 if (thread_data -> td.td_deque == NULL ) {
930 __kmp_alloc_task_deque( thread, thread_data );
931 }
932 }
933
934 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
935 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
936 }
937#endif
938
Jim Cownie5e8470a2013-09-27 10:38:44 +0000939 // Calculate shared structure offset including padding after kmp_task_t struct
940 // to align pointers in shared struct
941 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
942 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
943
944 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
945 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
946 gtid, shareds_offset) );
947 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
948 gtid, sizeof_shareds) );
949
950 // Avoid double allocation here by combining shareds with taskdata
951 #if USE_FAST_MEMORY
952 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
953 #else /* ! USE_FAST_MEMORY */
954 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
955 #endif /* USE_FAST_MEMORY */
956
957 task = KMP_TASKDATA_TO_TASK(taskdata);
958
959 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000960#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000961 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
962 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
963#else
964 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
965 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
966#endif
967 if (sizeof_shareds > 0) {
968 // Avoid double allocation here by combining shareds with taskdata
969 task->shareds = & ((char *) taskdata)[ shareds_offset ];
970 // Make sure shareds struct is aligned to pointer size
971 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
972 } else {
973 task->shareds = NULL;
974 }
975 task->routine = task_entry;
976 task->part_id = 0; // AC: Always start with 0 part id
977
978 taskdata->td_task_id = KMP_GEN_TASK_ID();
979 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000980 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000981 taskdata->td_parent = parent_task;
982 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
983 taskdata->td_ident = loc_ref;
984 taskdata->td_taskwait_ident = NULL;
985 taskdata->td_taskwait_counter = 0;
986 taskdata->td_taskwait_thread = 0;
987 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000988#if OMP_41_ENABLED
989 // avoid copying icvs for proxy tasks
990 if ( flags->proxy == TASK_FULL )
991#endif
992 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000993
994 taskdata->td_flags.tiedness = flags->tiedness;
995 taskdata->td_flags.final = flags->final;
996 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000997#if OMP_40_ENABLED
998 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
999#endif // OMP_40_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001000#if OMP_41_ENABLED
1001 taskdata->td_flags.proxy = flags->proxy;
Jonathan Peyton134f90d2016-02-11 23:07:30 +00001002 taskdata->td_task_team = thread->th.th_task_team;
Jonathan Peyton283a2152016-03-02 22:47:51 +00001003 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001004#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001005 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1006
1007 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1008 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
1009
1010 // GEH - TODO: fix this to copy parent task's value of team_serial flag
1011 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
1012
1013 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
1014 // tasks are not left until program termination to execute. Also, it helps locality to execute
1015 // immediately.
Jonathan Peyton7881aa12015-05-21 21:16:38 +00001016 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
Jim Cownie5e8470a2013-09-27 10:38:44 +00001017 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
1018
1019 taskdata->td_flags.started = 0;
1020 taskdata->td_flags.executing = 0;
1021 taskdata->td_flags.complete = 0;
1022 taskdata->td_flags.freed = 0;
1023
1024 taskdata->td_flags.native = flags->native;
1025
1026 taskdata->td_incomplete_child_tasks = 0;
1027 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1028#if OMP_40_ENABLED
1029 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1030 taskdata->td_dephash = NULL;
1031 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001032#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001033
1034 // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
1035#if OMP_41_ENABLED
1036 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1037#else
1038 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1039#endif
1040 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001041 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1042#if OMP_40_ENABLED
1043 if ( parent_task->td_taskgroup )
1044 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1045#endif
1046 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1047 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1048 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1049 }
1050 }
1051
1052 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1053 gtid, taskdata, taskdata->td_parent) );
1054
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001055#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +00001056 __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001057#endif
1058
Jim Cownie5e8470a2013-09-27 10:38:44 +00001059 return task;
1060}
1061
1062
1063kmp_task_t *
1064__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1065 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1066 kmp_routine_entry_t task_entry )
1067{
1068 kmp_task_t *retval;
1069 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1070
1071 input_flags->native = FALSE;
1072 // __kmp_task_alloc() sets up all other runtime flags
1073
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001074#if OMP_41_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001075 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001076 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1077 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001078 input_flags->proxy ? "proxy" : "",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001079 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001080#else
1081 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1082 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1083 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1084 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1085#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001086
1087 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1088 sizeof_shareds, task_entry );
1089
1090 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1091
1092 return retval;
1093}
1094
1095//-----------------------------------------------------------
1096// __kmp_invoke_task: invoke the specified task
1097//
1098// gtid: global thread ID of caller
1099// task: the task to invoke
1100// current_task: the task to resume after task invokation
1101
1102static void
1103__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1104{
1105 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001106 kmp_uint64 cur_time;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001107#if OMP_40_ENABLED
1108 int discard = 0 /* false */;
1109#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001110 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1111 gtid, taskdata, current_task) );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00001112 KMP_DEBUG_ASSERT(task);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001113#if OMP_41_ENABLED
1114 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1115 taskdata->td_flags.complete == 1)
1116 {
1117 // This is a proxy task that was already completed but it needs to run
1118 // its bottom-half finish
1119 KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1120 gtid, taskdata) );
1121
1122 __kmp_bottom_half_finish_proxy(gtid,task);
1123
1124 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1125
1126 return;
1127 }
1128#endif
1129
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001130#if USE_ITT_BUILD && USE_ITT_NOTIFY
1131 if(__kmp_forkjoin_frames_mode == 3) {
1132 // Get the current time stamp to measure task execution time to correct barrier imbalance time
1133 cur_time = __itt_get_timestamp();
1134 }
1135#endif
1136
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001137#if OMP_41_ENABLED
1138 // Proxy tasks are not handled by the runtime
1139 if ( taskdata->td_flags.proxy != TASK_PROXY )
1140#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001141 __kmp_task_start( gtid, task, current_task );
1142
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001143#if OMPT_SUPPORT
1144 ompt_thread_info_t oldInfo;
1145 kmp_info_t * thread;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001146 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001147 // Store the threads states and restore them after the task
1148 thread = __kmp_threads[ gtid ];
1149 oldInfo = thread->th.ompt_thread_info;
1150 thread->th.ompt_thread_info.wait_id = 0;
1151 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1152 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1153 }
1154#endif
1155
Jim Cownie181b4bb2013-12-23 17:28:57 +00001156#if OMP_40_ENABLED
1157 // TODO: cancel tasks if the parallel region has also been cancelled
1158 // TODO: check if this sequence can be hoisted above __kmp_task_start
1159 // if cancellation has been enabled for this run ...
1160 if (__kmp_omp_cancellation) {
1161 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1162 kmp_team_t * this_team = this_thr->th.th_team;
1163 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1164 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001165 KMP_COUNT_BLOCK(TASK_cancelled);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001166 // this task belongs to a task group and we need to cancel it
1167 discard = 1 /* true */;
1168 }
1169 }
1170
Jim Cownie5e8470a2013-09-27 10:38:44 +00001171 //
1172 // Invoke the task routine and pass in relevant data.
1173 // Thunks generated by gcc take a different argument list.
1174 //
Jim Cownie181b4bb2013-12-23 17:28:57 +00001175 if (!discard) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001176 KMP_COUNT_BLOCK(TASK_executed);
Jonathan Peyton495e1532016-03-11 20:23:05 +00001177 KMP_TIME_BLOCK (OMP_task);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001178#endif // OMP_40_ENABLED
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001179
1180#if OMPT_SUPPORT && OMPT_TRACE
1181 /* let OMPT know that we're about to run this task */
1182 if (ompt_enabled &&
1183 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1184 {
1185 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1186 current_task->ompt_task_info.task_id,
1187 taskdata->ompt_task_info.task_id);
1188 }
1189#endif
1190
Jim Cownie5e8470a2013-09-27 10:38:44 +00001191#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +00001192 if (taskdata->td_flags.native) {
1193 ((void (*)(void *))(*(task->routine)))(task->shareds);
1194 }
1195 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001196#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001197 {
1198 (*(task->routine))(gtid, task);
1199 }
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001200
1201#if OMPT_SUPPORT && OMPT_TRACE
1202 /* let OMPT know that we're returning to the callee task */
1203 if (ompt_enabled &&
1204 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1205 {
1206 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1207 taskdata->ompt_task_info.task_id,
1208 current_task->ompt_task_info.task_id);
1209 }
1210#endif
1211
Jim Cownie181b4bb2013-12-23 17:28:57 +00001212#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001213 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001214#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001215
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001216
1217#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001218 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001219 thread->th.ompt_thread_info = oldInfo;
1220 taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1221 }
1222#endif
1223
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001224#if OMP_41_ENABLED
1225 // Proxy tasks are not handled by the runtime
1226 if ( taskdata->td_flags.proxy != TASK_PROXY )
1227#endif
1228 __kmp_task_finish( gtid, task, current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001229
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001230#if USE_ITT_BUILD && USE_ITT_NOTIFY
1231 // Barrier imbalance - correct arrive time after the task finished
1232 if(__kmp_forkjoin_frames_mode == 3) {
1233 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1234 if(this_thr->th.th_bar_arrive_time) {
1235 this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1236 }
1237 }
1238#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001239 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001240 gtid, taskdata, current_task) );
1241 return;
1242}
1243
1244//-----------------------------------------------------------------------
1245// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1246//
1247// loc_ref: location of original task pragma (ignored)
1248// gtid: Global Thread ID of encountering thread
1249// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1250// Returns:
1251// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1252// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1253
1254kmp_int32
1255__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1256{
1257 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1258
1259 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1260 gtid, loc_ref, new_taskdata ) );
1261
1262 /* Should we execute the new task or queue it? For now, let's just always try to
1263 queue it. If the queue fills up, then we'll execute it. */
1264
1265 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1266 { // Execute this task immediately
1267 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1268 new_taskdata->td_flags.task_serial = 1;
1269 __kmp_invoke_task( gtid, new_task, current_task );
1270 }
1271
1272 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1273 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1274 new_taskdata ) );
1275
1276 return TASK_CURRENT_NOT_QUEUED;
1277}
1278
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001279//---------------------------------------------------------------------
1280// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1281// gtid: Global Thread ID of encountering thread
1282// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1283// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1284// returns:
1285//
1286// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1287// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1288kmp_int32
1289__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1290{
1291 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1292
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001293#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001294 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001295 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1296 __builtin_frame_address(0);
1297 }
1298#endif
1299
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001300 /* Should we execute the new task or queue it? For now, let's just always try to
1301 queue it. If the queue fills up, then we'll execute it. */
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001302#if OMP_41_ENABLED
1303 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1304#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001305 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001306#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001307 { // Execute this task immediately
1308 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1309 if ( serialize_immediate )
1310 new_taskdata -> td_flags.task_serial = 1;
1311 __kmp_invoke_task( gtid, new_task, current_task );
1312 }
1313
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001314#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001315 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001316 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1317 }
1318#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001319
1320 return TASK_CURRENT_NOT_QUEUED;
1321}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001322
1323//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001324// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1325// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001326// loc_ref: location of original task pragma (ignored)
1327// gtid: Global Thread ID of encountering thread
1328// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1329// returns:
1330//
1331// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1332// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1333
1334kmp_int32
1335__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1336{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001337 kmp_int32 res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001338
Jonathan Peytond2eb3c72015-08-26 20:02:21 +00001339#if KMP_DEBUG
1340 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1341#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001342 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1343 gtid, loc_ref, new_taskdata ) );
1344
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001345 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001346
1347 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1348 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001349 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001350}
1351
Jim Cownie5e8470a2013-09-27 10:38:44 +00001352//-------------------------------------------------------------------------------------
1353// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1354
1355kmp_int32
1356__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1357{
1358 kmp_taskdata_t * taskdata;
1359 kmp_info_t * thread;
1360 int thread_finished = FALSE;
1361
Jonathan Peyton54127982015-11-04 21:37:48 +00001362 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001363
1364 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1365 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1366
1367 thread = __kmp_threads[ gtid ];
1368 taskdata = thread -> th.th_current_task;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001369
1370#if OMPT_SUPPORT && OMPT_TRACE
1371 ompt_task_id_t my_task_id;
1372 ompt_parallel_id_t my_parallel_id;
1373
1374 if (ompt_enabled) {
1375 kmp_team_t *team = thread->th.th_team;
1376 my_task_id = taskdata->ompt_task_info.task_id;
1377 my_parallel_id = team->t.ompt_team_info.parallel_id;
1378
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001379 taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001380 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1381 ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
1382 my_parallel_id, my_task_id);
1383 }
1384 }
1385#endif
1386
Jim Cownie5e8470a2013-09-27 10:38:44 +00001387#if USE_ITT_BUILD
1388 // Note: These values are used by ITT events as well.
1389#endif /* USE_ITT_BUILD */
1390 taskdata->td_taskwait_counter += 1;
1391 taskdata->td_taskwait_ident = loc_ref;
1392 taskdata->td_taskwait_thread = gtid + 1;
1393
1394#if USE_ITT_BUILD
1395 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1396 if ( itt_sync_obj != NULL )
1397 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1398#endif /* USE_ITT_BUILD */
1399
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001400#if OMP_41_ENABLED
1401 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1402#else
1403 if ( ! taskdata->td_flags.team_serial )
1404#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001405 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001406 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001407 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001408 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001409 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1410 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001411 }
1412 }
1413#if USE_ITT_BUILD
1414 if ( itt_sync_obj != NULL )
1415 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1416#endif /* USE_ITT_BUILD */
1417
1418 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1419 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001420
1421#if OMPT_SUPPORT && OMPT_TRACE
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001422 if (ompt_enabled) {
1423 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1424 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001425 my_parallel_id, my_task_id);
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001426 }
1427 taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001428 }
1429#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001430 }
1431
1432 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1433 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1434
1435 return TASK_CURRENT_NOT_QUEUED;
1436}
1437
1438
1439//-------------------------------------------------
1440// __kmpc_omp_taskyield: switch to a different task
1441
1442kmp_int32
1443__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1444{
1445 kmp_taskdata_t * taskdata;
1446 kmp_info_t * thread;
1447 int thread_finished = FALSE;
1448
Jonathan Peyton45be4502015-08-11 21:36:41 +00001449 KMP_COUNT_BLOCK(OMP_TASKYIELD);
1450
Jim Cownie5e8470a2013-09-27 10:38:44 +00001451 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1452 gtid, loc_ref, end_part) );
1453
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001454 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001455 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1456
1457 thread = __kmp_threads[ gtid ];
1458 taskdata = thread -> th.th_current_task;
1459 // Should we model this as a task wait or not?
1460#if USE_ITT_BUILD
1461 // Note: These values are used by ITT events as well.
1462#endif /* USE_ITT_BUILD */
1463 taskdata->td_taskwait_counter += 1;
1464 taskdata->td_taskwait_ident = loc_ref;
1465 taskdata->td_taskwait_thread = gtid + 1;
1466
1467#if USE_ITT_BUILD
1468 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1469 if ( itt_sync_obj != NULL )
1470 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1471#endif /* USE_ITT_BUILD */
1472 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001473 kmp_task_team_t * task_team = thread->th.th_task_team;
1474 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001475 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001476 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1477 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1478 }
1479 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001480 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001481#if USE_ITT_BUILD
1482 if ( itt_sync_obj != NULL )
1483 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1484#endif /* USE_ITT_BUILD */
1485
1486 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1487 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1488 }
1489
1490 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1491 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1492
1493 return TASK_CURRENT_NOT_QUEUED;
1494}
1495
1496
1497#if OMP_40_ENABLED
1498//-------------------------------------------------------------------------------------
1499// __kmpc_taskgroup: Start a new taskgroup
1500
1501void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001502__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001503{
1504 kmp_info_t * thread = __kmp_threads[ gtid ];
1505 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1506 kmp_taskgroup_t * tg_new =
1507 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1508 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1509 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001510 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001511 tg_new->parent = taskdata->td_taskgroup;
1512 taskdata->td_taskgroup = tg_new;
1513}
1514
1515
1516//-------------------------------------------------------------------------------------
1517// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1518// and its descendants are complete
1519
1520void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001521__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001522{
1523 kmp_info_t * thread = __kmp_threads[ gtid ];
1524 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1525 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1526 int thread_finished = FALSE;
1527
1528 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1529 KMP_DEBUG_ASSERT( taskgroup != NULL );
1530
1531 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1532#if USE_ITT_BUILD
1533 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1534 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1535 if ( itt_sync_obj != NULL )
1536 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1537#endif /* USE_ITT_BUILD */
1538
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001539#if OMP_41_ENABLED
1540 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1541#else
1542 if ( ! taskdata->td_flags.team_serial )
1543#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001544 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001545 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001546 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001547 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1548 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001549 }
1550 }
1551
1552#if USE_ITT_BUILD
1553 if ( itt_sync_obj != NULL )
1554 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1555#endif /* USE_ITT_BUILD */
1556 }
1557 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1558
1559 // Restore parent taskgroup for the current task
1560 taskdata->td_taskgroup = taskgroup->parent;
1561 __kmp_thread_free( thread, taskgroup );
1562
1563 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1564}
1565#endif
1566
1567
1568//------------------------------------------------------
1569// __kmp_remove_my_task: remove a task from my own deque
1570
1571static kmp_task_t *
1572__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1573 kmp_int32 is_constrained )
1574{
1575 kmp_task_t * task;
1576 kmp_taskdata_t * taskdata;
1577 kmp_thread_data_t *thread_data;
1578 kmp_uint32 tail;
1579
1580 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1581 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1582
1583 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1584
1585 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1586 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1587 thread_data->td.td_deque_tail) );
1588
1589 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1590 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1591 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1592 thread_data->td.td_deque_tail) );
1593 return NULL;
1594 }
1595
1596 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1597
1598 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1599 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1600 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1601 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1602 thread_data->td.td_deque_tail) );
1603 return NULL;
1604 }
1605
1606 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1607 taskdata = thread_data -> td.td_deque[ tail ];
1608
1609 if (is_constrained) {
1610 // we need to check if the candidate obeys task scheduling constraint:
1611 // only child of current task can be scheduled
1612 kmp_taskdata_t * current = thread->th.th_current_task;
1613 kmp_int32 level = current->td_level;
1614 kmp_taskdata_t * parent = taskdata->td_parent;
1615 while ( parent != current && parent->td_level > level ) {
1616 parent = parent->td_parent; // check generation up to the level of the current task
1617 KMP_DEBUG_ASSERT(parent != NULL);
1618 }
1619 if ( parent != current ) {
1620 // If the tail task is not a child, then no other childs can appear in the deque.
1621 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1622 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1623 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1624 thread_data->td.td_deque_tail) );
1625 return NULL;
1626 }
1627 }
1628
1629 thread_data -> td.td_deque_tail = tail;
1630 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1631
1632 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1633
1634 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1635 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1636 thread_data->td.td_deque_tail) );
1637
1638 task = KMP_TASKDATA_TO_TASK( taskdata );
1639 return task;
1640}
1641
1642
1643//-----------------------------------------------------------
1644// __kmp_steal_task: remove a task from another thread's deque
1645// Assume that calling thread has already checked existence of
1646// task_team thread_data before calling this routine.
1647
1648static kmp_task_t *
1649__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1650 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1651 kmp_int32 is_constrained )
1652{
1653 kmp_task_t * task;
1654 kmp_taskdata_t * taskdata;
1655 kmp_thread_data_t *victim_td, *threads_data;
Jonathan Peyton7c4d66d2015-06-08 20:01:14 +00001656 kmp_int32 victim_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001657
1658 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1659
1660 threads_data = task_team -> tt.tt_threads_data;
1661 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1662
1663 victim_tid = victim->th.th_info.ds.ds_tid;
1664 victim_td = & threads_data[ victim_tid ];
1665
1666 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1667 "head=%u tail=%u\n",
1668 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1669 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1670
1671 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1672 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1673 {
1674 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1675 "ntasks=%d head=%u tail=%u\n",
1676 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1677 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1678 return NULL;
1679 }
1680
1681 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1682
1683 // Check again after we acquire the lock
1684 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1685 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1686 {
1687 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1688 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1689 "ntasks=%d head=%u tail=%u\n",
1690 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1691 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1692 return NULL;
1693 }
1694
1695 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1696
1697 if ( !is_constrained ) {
1698 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1699 // Bump head pointer and Wrap.
1700 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1701 } else {
1702 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1703 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1704 taskdata = victim_td -> td.td_deque[ tail ];
1705 // we need to check if the candidate obeys task scheduling constraint:
1706 // only child of current task can be scheduled
1707 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1708 kmp_int32 level = current->td_level;
1709 kmp_taskdata_t * parent = taskdata->td_parent;
1710 while ( parent != current && parent->td_level > level ) {
1711 parent = parent->td_parent; // check generation up to the level of the current task
1712 KMP_DEBUG_ASSERT(parent != NULL);
1713 }
1714 if ( parent != current ) {
1715 // If the tail task is not a child, then no other childs can appear in the deque (?).
1716 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1717 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1718 "ntasks=%d head=%u tail=%u\n",
1719 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1720 task_team, victim_td->td.td_deque_ntasks,
1721 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1722 return NULL;
1723 }
1724 victim_td -> td.td_deque_tail = tail;
1725 }
1726 if (*thread_finished) {
1727 // We need to un-mark this victim as a finished victim. This must be done before
1728 // releasing the lock, or else other threads (starting with the master victim)
1729 // might be prematurely released from the barrier!!!
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001730 kmp_uint32 count;
1731
1732 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001733
1734 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1735 gtid, count + 1, task_team) );
1736
1737 *thread_finished = FALSE;
1738 }
1739 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1740
1741 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1742
Jonathan Peyton45be4502015-08-11 21:36:41 +00001743 KMP_COUNT_BLOCK(TASK_stolen);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001744 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001745 "ntasks=%d head=%u tail=%u\n",
1746 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1747 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1748 victim_td->td.td_deque_tail) );
1749
1750 task = KMP_TASKDATA_TO_TASK( taskdata );
1751 return task;
1752}
1753
1754
1755//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001756// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001757// is statisfied (return true) or there are none left (return false).
1758// final_spin is TRUE if this is the spin at the release barrier.
1759// thread_finished indicates whether the thread is finished executing all
1760// the tasks it has on its deque, and is at the release barrier.
1761// spinner is the location on which to spin.
1762// spinner == NULL means only execute a single task and return.
1763// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001764template <class C>
1765static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1766 int *thread_finished
1767 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001768{
1769 kmp_task_team_t * task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001770 kmp_thread_data_t * threads_data;
1771 kmp_task_t * task;
1772 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1773 volatile kmp_uint32 * unfinished_threads;
1774 kmp_int32 nthreads, last_stolen, k, tid;
1775
1776 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1777 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1778
1779 task_team = thread -> th.th_task_team;
Jonathan Peyton54127982015-11-04 21:37:48 +00001780 if (task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001781
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001782 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001783 gtid, final_spin, *thread_finished) );
1784
1785 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1786 KMP_DEBUG_ASSERT( threads_data != NULL );
1787
1788 nthreads = task_team -> tt.tt_nproc;
1789 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001790#if OMP_41_ENABLED
1791 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1792#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001793 KMP_DEBUG_ASSERT( nthreads > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001794#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001795 KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1796
1797 // Choose tasks from our own work queue.
1798 start:
1799 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1800#if USE_ITT_BUILD && USE_ITT_NOTIFY
1801 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1802 if ( itt_sync_obj == NULL ) {
1803 // we are at fork barrier where we could not get the object reliably
1804 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1805 }
1806 __kmp_itt_task_starting( itt_sync_obj );
1807 }
1808#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1809 __kmp_invoke_task( gtid, task, current_task );
1810#if USE_ITT_BUILD
1811 if ( itt_sync_obj != NULL )
1812 __kmp_itt_task_finished( itt_sync_obj );
1813#endif /* USE_ITT_BUILD */
1814
1815 // If this thread is only partway through the barrier and the condition
1816 // is met, then return now, so that the barrier gather/release pattern can proceed.
1817 // If this thread is in the last spin loop in the barrier, waiting to be
1818 // released, we know that the termination condition will not be satisified,
1819 // so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001820 if (flag == NULL || (!final_spin && flag->done_check())) {
1821 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001822 return TRUE;
1823 }
Jonathan Peyton54127982015-11-04 21:37:48 +00001824 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001825 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1826 }
1827
1828 // This thread's work queue is empty. If we are in the final spin loop
1829 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001830#if OMP_41_ENABLED
1831 // The work queue may be empty but there might be proxy tasks still executing
1832 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1833#else
1834 if (final_spin)
1835#endif
1836 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001837 // First, decrement the #unfinished threads, if that has not already
1838 // been done. This decrement might be to the spin location, and
1839 // result in the termination condition being satisfied.
1840 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001841 kmp_uint32 count;
1842
1843 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001844 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001845 gtid, count, task_team) );
1846 *thread_finished = TRUE;
1847 }
1848
1849 // It is now unsafe to reference thread->th.th_team !!!
1850 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1851 // thread to pass through the barrier, where it might reset each thread's
1852 // th.th_team field for the next parallel region.
1853 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001854 if (flag != NULL && flag->done_check()) {
1855 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001856 return TRUE;
1857 }
1858 }
1859
Jonathan Peyton54127982015-11-04 21:37:48 +00001860 if (thread->th.th_task_team == NULL) return FALSE;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001861#if OMP_41_ENABLED
1862 // check if there are other threads to steal from, otherwise go back
1863 if ( nthreads == 1 )
1864 goto start;
1865#endif
1866
Jim Cownie5e8470a2013-09-27 10:38:44 +00001867 // Try to steal from the last place I stole from successfully.
1868 tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1869 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1870
1871 if (last_stolen != -1) {
1872 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1873
1874 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1875 thread_finished, is_constrained )) != NULL)
1876 {
1877#if USE_ITT_BUILD && USE_ITT_NOTIFY
1878 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1879 if ( itt_sync_obj == NULL ) {
1880 // we are at fork barrier where we could not get the object reliably
1881 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1882 }
1883 __kmp_itt_task_starting( itt_sync_obj );
1884 }
1885#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1886 __kmp_invoke_task( gtid, task, current_task );
1887#if USE_ITT_BUILD
1888 if ( itt_sync_obj != NULL )
1889 __kmp_itt_task_finished( itt_sync_obj );
1890#endif /* USE_ITT_BUILD */
1891
1892 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001893 if (flag == NULL || (!final_spin && flag->done_check())) {
1894 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001895 gtid) );
1896 return TRUE;
1897 }
1898
Jonathan Peyton54127982015-11-04 21:37:48 +00001899 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001900 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1901 // If the execution of the stolen task resulted in more tasks being
1902 // placed on our run queue, then restart the whole process.
1903 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001904 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001905 gtid) );
1906 goto start;
1907 }
1908 }
1909
1910 // Don't give priority to stealing from this thread anymore.
1911 threads_data[ tid ].td.td_deque_last_stolen = -1;
1912
1913 // The victims's work queue is empty. If we are in the final spin loop
1914 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001915#if OMP_41_ENABLED
1916 // The work queue may be empty but there might be proxy tasks still executing
1917 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1918#else
1919 if (final_spin)
1920#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001921 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001922 // First, decrement the #unfinished threads, if that has not already
1923 // been done. This decrement might be to the spin location, and
1924 // result in the termination condition being satisfied.
1925 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001926 kmp_uint32 count;
1927
1928 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001929 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001930 "task_team=%p\n", gtid, count, task_team) );
1931 *thread_finished = TRUE;
1932 }
1933
1934 // If __kmp_tasking_mode != tskm_immediate_exec
1935 // then it is now unsafe to reference thread->th.th_team !!!
1936 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1937 // thread to pass through the barrier, where it might reset each thread's
1938 // th.th_team field for the next parallel region.
1939 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001940 if (flag != NULL && flag->done_check()) {
1941 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001942 gtid) );
1943 return TRUE;
1944 }
1945 }
Jonathan Peyton54127982015-11-04 21:37:48 +00001946 if (thread->th.th_task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001947 }
1948
1949 // Find a different thread to steal work from. Pick a random thread.
1950 // My initial plan was to cycle through all the threads, and only return
1951 // if we tried to steal from every thread, and failed. Arch says that's
1952 // not such a great idea.
1953 // GEH - need yield code in this loop for throughput library mode?
1954 new_victim:
1955 k = __kmp_get_random( thread ) % (nthreads - 1);
1956 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1957 ++k; // Adjusts random distribution to exclude self
1958 }
1959 {
1960 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1961 int first;
1962
1963 // There is a slight chance that __kmp_enable_tasking() did not wake up
1964 // all threads waiting at the barrier. If this thread is sleeping, then
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001965 // wake it up. Since we were going to pay the cache miss penalty
1966 // for referencing another thread's kmp_info_t struct anyway, the check
Jim Cownie5e8470a2013-09-27 10:38:44 +00001967 // shouldn't cost too much performance at this point.
1968 // In extra barrier mode, tasks do not sleep at the separate tasking
1969 // barrier, so this isn't a problem.
1970 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1971 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1972 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1973 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001974 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001975 // A sleeping thread should not have any tasks on it's queue.
Alp Toker8f2d3f02014-02-24 10:40:15 +00001976 // There is a slight possibility that it resumes, steals a task from
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001977 // another thread, which spawns more tasks, all in the time that it takes
Jim Cownie5e8470a2013-09-27 10:38:44 +00001978 // this thread to check => don't write an assertion that the victim's
1979 // queue is empty. Try stealing from a different thread.
1980 goto new_victim;
1981 }
1982
1983 // Now try to steal work from the selected thread
1984 first = TRUE;
1985 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1986 thread_finished, is_constrained )) != NULL)
1987 {
1988#if USE_ITT_BUILD && USE_ITT_NOTIFY
1989 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1990 if ( itt_sync_obj == NULL ) {
1991 // we are at fork barrier where we could not get the object reliably
1992 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1993 }
1994 __kmp_itt_task_starting( itt_sync_obj );
1995 }
1996#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1997 __kmp_invoke_task( gtid, task, current_task );
1998#if USE_ITT_BUILD
1999 if ( itt_sync_obj != NULL )
2000 __kmp_itt_task_finished( itt_sync_obj );
2001#endif /* USE_ITT_BUILD */
2002
2003 // Try stealing from this victim again, in the future.
2004 if (first) {
2005 threads_data[ tid ].td.td_deque_last_stolen = k;
2006 first = FALSE;
2007 }
2008
2009 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002010 if (flag == NULL || (!final_spin && flag->done_check())) {
2011 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00002012 gtid) );
2013 return TRUE;
2014 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002015 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002016 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
2017
2018 // If the execution of the stolen task resulted in more tasks being
2019 // placed on our run queue, then restart the whole process.
2020 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002021 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00002022 gtid) );
2023 goto start;
2024 }
2025 }
2026
2027 // The victims's work queue is empty. If we are in the final spin loop
2028 // of the barrier, check and see if the termination condition is satisfied.
2029 // Going on and finding a new victim to steal from is expensive, as it
2030 // involves a lot of cache misses, so we definitely want to re-check the
2031 // termination condition before doing that.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002032#if OMP_41_ENABLED
2033 // The work queue may be empty but there might be proxy tasks still executing
2034 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
2035#else
2036 if (final_spin)
2037#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002038 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002039 // First, decrement the #unfinished threads, if that has not already
2040 // been done. This decrement might be to the spin location, and
2041 // result in the termination condition being satisfied.
2042 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002043 kmp_uint32 count;
2044
2045 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002046 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
Jim Cownie5e8470a2013-09-27 10:38:44 +00002047 "task_team=%p\n",
2048 gtid, count, task_team) );
2049 *thread_finished = TRUE;
2050 }
2051
2052 // If __kmp_tasking_mode != tskm_immediate_exec,
2053 // then it is now unsafe to reference thread->th.th_team !!!
2054 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
2055 // thread to pass through the barrier, where it might reset each thread's
2056 // th.th_team field for the next parallel region.
2057 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002058 if (flag != NULL && flag->done_check()) {
2059 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002060 return TRUE;
2061 }
2062 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002063 if (thread->th.th_task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002064 }
2065
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002066 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002067 return FALSE;
2068}
2069
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002070int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
2071 int *thread_finished
2072 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2073{
2074 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2075 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2076}
2077
2078int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
2079 int *thread_finished
2080 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2081{
2082 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2083 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2084}
2085
2086int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
2087 int *thread_finished
2088 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2089{
2090 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2091 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2092}
2093
2094
Jim Cownie5e8470a2013-09-27 10:38:44 +00002095
2096//-----------------------------------------------------------------------------
2097// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2098// next barrier so they can assist in executing enqueued tasks.
2099// First thread in allocates the task team atomically.
2100
2101static void
2102__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2103{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002104 kmp_thread_data_t *threads_data;
2105 int nthreads, i, is_init_thread;
2106
2107 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2108 __kmp_gtid_from_thread( this_thr ) ) );
2109
2110 KMP_DEBUG_ASSERT(task_team != NULL);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002111 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002112
2113 nthreads = task_team->tt.tt_nproc;
2114 KMP_DEBUG_ASSERT(nthreads > 0);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002115 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002116
2117 // Allocate or increase the size of threads_data if necessary
2118 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2119
2120 if (!is_init_thread) {
2121 // Some other thread already set up the array.
2122 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2123 __kmp_gtid_from_thread( this_thr ) ) );
2124 return;
2125 }
2126 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2127 KMP_DEBUG_ASSERT( threads_data != NULL );
2128
2129 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2130 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2131 {
2132 // Release any threads sleeping at the barrier, so that they can steal
2133 // tasks and execute them. In extra barrier mode, tasks do not sleep
2134 // at the separate tasking barrier, so this isn't a problem.
2135 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002136 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002137 kmp_info_t *thread = threads_data[i].td.td_thr;
2138
2139 if (i == this_thr->th.th_info.ds.ds_tid) {
2140 continue;
2141 }
2142 // Since we haven't locked the thread's suspend mutex lock at this
2143 // point, there is a small window where a thread might be putting
2144 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002145 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002146 // see if other threads are sleeping (using the same random
2147 // mechanism that is used for task stealing) and awakens them if
2148 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002149 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002150 {
2151 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2152 __kmp_gtid_from_thread( this_thr ),
2153 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002154 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002155 }
2156 else {
2157 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2158 __kmp_gtid_from_thread( this_thr ),
2159 __kmp_gtid_from_thread( thread ) ) );
2160 }
2161 }
2162 }
2163
2164 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2165 __kmp_gtid_from_thread( this_thr ) ) );
2166}
2167
2168
2169/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002170/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00002171 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2172 * like a shadow of the kmp_team_t data struct, with a different lifetime.
2173 * After a child * thread checks into a barrier and calls __kmp_release() from
2174 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2175 * longer assume that the kmp_team_t structure is intact (at any moment, the
2176 * master thread may exit the barrier code and free the team data structure,
2177 * and return the threads to the thread pool).
2178 *
2179 * This does not work with the the tasking code, as the thread is still
2180 * expected to participate in the execution of any tasks that may have been
2181 * spawned my a member of the team, and the thread still needs access to all
2182 * to each thread in the team, so that it can steal work from it.
2183 *
2184 * Enter the existence of the kmp_task_team_t struct. It employs a reference
2185 * counting mechanims, and is allocated by the master thread before calling
2186 * __kmp_<barrier_kind>_release, and then is release by the last thread to
2187 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2188 * of the kmp_task_team_t structs for consecutive barriers can overlap
2189 * (and will, unless the master thread is the last thread to exit the barrier
2190 * release phase, which is not typical).
2191 *
2192 * The existence of such a struct is useful outside the context of tasking,
2193 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2194 * so that any performance differences show up when comparing the 2.5 vs. 3.0
2195 * libraries.
2196 *
2197 * We currently use the existence of the threads array as an indicator that
2198 * tasks were spawned since the last barrier. If the structure is to be
2199 * useful outside the context of tasking, then this will have to change, but
2200 * not settting the field minimizes the performance impact of tasking on
2201 * barriers, when no explicit tasks were spawned (pushed, actually).
2202 */
2203
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002204
Jim Cownie5e8470a2013-09-27 10:38:44 +00002205static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2206// Lock for task team data structures
2207static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2208
2209
2210//------------------------------------------------------------------------------
2211// __kmp_alloc_task_deque:
2212// Allocates a task deque for a particular thread, and initialize the necessary
2213// data structures relating to the deque. This only happens once per thread
2214// per task team since task teams are recycled.
2215// No lock is needed during allocation since each thread allocates its own
2216// deque.
2217
2218static void
2219__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2220{
2221 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2222 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2223
2224 // Initialize last stolen task field to "none"
2225 thread_data -> td.td_deque_last_stolen = -1;
2226
2227 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2228 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2229 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2230
2231 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2232 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2233 // Allocate space for task deque, and zero the deque
2234 // Cannot use __kmp_thread_calloc() because threads not around for
2235 // kmp_reap_task_team( ).
2236 thread_data -> td.td_deque = (kmp_taskdata_t **)
2237 __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2238}
2239
2240
2241//------------------------------------------------------------------------------
2242// __kmp_free_task_deque:
2243// Deallocates a task deque for a particular thread.
2244// Happens at library deallocation so don't need to reset all thread data fields.
2245
2246static void
2247__kmp_free_task_deque( kmp_thread_data_t *thread_data )
2248{
2249 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2250
2251 if ( thread_data -> td.td_deque != NULL ) {
2252 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2253 __kmp_free( thread_data -> td.td_deque );
2254 thread_data -> td.td_deque = NULL;
2255 }
2256 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2257
2258#ifdef BUILD_TIED_TASK_STACK
2259 // GEH: Figure out what to do here for td_susp_tied_tasks
2260 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2261 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2262 }
2263#endif // BUILD_TIED_TASK_STACK
2264}
2265
2266
2267//------------------------------------------------------------------------------
2268// __kmp_realloc_task_threads_data:
2269// Allocates a threads_data array for a task team, either by allocating an initial
2270// array or enlarging an existing array. Only the first thread to get the lock
2271// allocs or enlarges the array and re-initializes the array eleemnts.
2272// That thread returns "TRUE", the rest return "FALSE".
2273// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2274// The current size is given by task_team -> tt.tt_max_threads.
2275
2276static int
2277__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2278{
2279 kmp_thread_data_t ** threads_data_p;
2280 kmp_int32 nthreads, maxthreads;
2281 int is_init_thread = FALSE;
2282
2283 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2284 // Already reallocated and initialized.
2285 return FALSE;
2286 }
2287
2288 threads_data_p = & task_team -> tt.tt_threads_data;
2289 nthreads = task_team -> tt.tt_nproc;
2290 maxthreads = task_team -> tt.tt_max_threads;
2291
2292 // All threads must lock when they encounter the first task of the implicit task
2293 // region to make sure threads_data fields are (re)initialized before used.
2294 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2295
2296 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2297 // first thread to enable tasking
2298 kmp_team_t *team = thread -> th.th_team;
2299 int i;
2300
2301 is_init_thread = TRUE;
2302 if ( maxthreads < nthreads ) {
2303
2304 if ( *threads_data_p != NULL ) {
2305 kmp_thread_data_t *old_data = *threads_data_p;
2306 kmp_thread_data_t *new_data = NULL;
2307
2308 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2309 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2310 __kmp_gtid_from_thread( thread ), task_team,
2311 nthreads, maxthreads ) );
2312 // Reallocate threads_data to have more elements than current array
2313 // Cannot use __kmp_thread_realloc() because threads not around for
2314 // kmp_reap_task_team( ). Note all new array entries are initialized
2315 // to zero by __kmp_allocate().
2316 new_data = (kmp_thread_data_t *)
2317 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2318 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002319 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002320 (void *) old_data,
2321 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002322
2323#ifdef BUILD_TIED_TASK_STACK
2324 // GEH: Figure out if this is the right thing to do
2325 for (i = maxthreads; i < nthreads; i++) {
2326 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2327 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2328 }
2329#endif // BUILD_TIED_TASK_STACK
2330 // Install the new data and free the old data
2331 (*threads_data_p) = new_data;
2332 __kmp_free( old_data );
2333 }
2334 else {
2335 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2336 "threads data for task_team %p, size = %d\n",
2337 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2338 // Make the initial allocate for threads_data array, and zero entries
2339 // Cannot use __kmp_thread_calloc() because threads not around for
2340 // kmp_reap_task_team( ).
2341 *threads_data_p = (kmp_thread_data_t *)
2342 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2343#ifdef BUILD_TIED_TASK_STACK
2344 // GEH: Figure out if this is the right thing to do
2345 for (i = 0; i < nthreads; i++) {
2346 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2347 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2348 }
2349#endif // BUILD_TIED_TASK_STACK
2350 }
2351 task_team -> tt.tt_max_threads = nthreads;
2352 }
2353 else {
2354 // If array has (more than) enough elements, go ahead and use it
2355 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2356 }
2357
2358 // initialize threads_data pointers back to thread_info structures
2359 for (i = 0; i < nthreads; i++) {
2360 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2361 thread_data -> td.td_thr = team -> t.t_threads[i];
2362
2363 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2364 // The last stolen field survives across teams / barrier, and the number
2365 // of threads may have changed. It's possible (likely?) that a new
2366 // parallel region will exhibit the same behavior as the previous region.
2367 thread_data -> td.td_deque_last_stolen = -1;
2368 }
2369 }
2370
2371 KMP_MB();
2372 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2373 }
2374
2375 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2376 return is_init_thread;
2377}
2378
2379
2380//------------------------------------------------------------------------------
2381// __kmp_free_task_threads_data:
2382// Deallocates a threads_data array for a task team, including any attached
2383// tasking deques. Only occurs at library shutdown.
2384
2385static void
2386__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2387{
2388 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2389 if ( task_team -> tt.tt_threads_data != NULL ) {
2390 int i;
2391 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2392 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2393 }
2394 __kmp_free( task_team -> tt.tt_threads_data );
2395 task_team -> tt.tt_threads_data = NULL;
2396 }
2397 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2398}
2399
2400
2401//------------------------------------------------------------------------------
2402// __kmp_allocate_task_team:
2403// Allocates a task team associated with a specific team, taking it from
2404// the global task team free list if possible. Also initializes data structures.
2405
2406static kmp_task_team_t *
2407__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2408{
2409 kmp_task_team_t *task_team = NULL;
2410 int nthreads;
2411
2412 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2413 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2414
2415 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2416 // Take a task team from the task team pool
2417 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2418 if (__kmp_free_task_teams != NULL) {
2419 task_team = __kmp_free_task_teams;
2420 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2421 task_team -> tt.tt_next = NULL;
2422 }
2423 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2424 }
2425
2426 if (task_team == NULL) {
2427 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2428 "task team for team %p\n",
2429 __kmp_gtid_from_thread( thread ), team ) );
2430 // Allocate a new task team if one is not available.
2431 // Cannot use __kmp_thread_malloc() because threads not around for
2432 // kmp_reap_task_team( ).
2433 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2434 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2435 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2436 //task_team -> tt.tt_max_threads = 0;
2437 //task_team -> tt.tt_next = NULL;
2438 }
2439
2440 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002441#if OMP_41_ENABLED
2442 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2443#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002444 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2445
Jim Cownie5e8470a2013-09-27 10:38:44 +00002446 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2447 TCW_4( task_team -> tt.tt_active, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002448
Jonathan Peyton54127982015-11-04 21:37:48 +00002449 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
2450 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002451 return task_team;
2452}
2453
2454
2455//------------------------------------------------------------------------------
2456// __kmp_free_task_team:
2457// Frees the task team associated with a specific thread, and adds it
2458// to the global task team free list.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002459
Jonathan Peyton54127982015-11-04 21:37:48 +00002460void
Jim Cownie5e8470a2013-09-27 10:38:44 +00002461__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2462{
2463 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2464 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2465
Jim Cownie5e8470a2013-09-27 10:38:44 +00002466 // Put task team back on free list
2467 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2468
2469 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2470 task_team -> tt.tt_next = __kmp_free_task_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002471 TCW_PTR(__kmp_free_task_teams, task_team);
2472
2473 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2474}
2475
2476
2477//------------------------------------------------------------------------------
2478// __kmp_reap_task_teams:
2479// Free all the task teams on the task team free list.
2480// Should only be done during library shutdown.
2481// Cannot do anything that needs a thread structure or gtid since they are already gone.
2482
2483void
2484__kmp_reap_task_teams( void )
2485{
2486 kmp_task_team_t *task_team;
2487
2488 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2489 // Free all task_teams on the free list
2490 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2491 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2492 __kmp_free_task_teams = task_team -> tt.tt_next;
2493 task_team -> tt.tt_next = NULL;
2494
2495 // Free threads_data if necessary
2496 if ( task_team -> tt.tt_threads_data != NULL ) {
2497 __kmp_free_task_threads_data( task_team );
2498 }
2499 __kmp_free( task_team );
2500 }
2501 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2502 }
2503}
2504
Jim Cownie5e8470a2013-09-27 10:38:44 +00002505//------------------------------------------------------------------------------
2506// __kmp_wait_to_unref_task_teams:
2507// Some threads could still be in the fork barrier release code, possibly
2508// trying to steal tasks. Wait for each thread to unreference its task team.
2509//
2510void
2511__kmp_wait_to_unref_task_teams(void)
2512{
2513 kmp_info_t *thread;
2514 kmp_uint32 spins;
2515 int done;
2516
2517 KMP_INIT_YIELD( spins );
2518
Jim Cownie5e8470a2013-09-27 10:38:44 +00002519 for (;;) {
2520 done = TRUE;
2521
2522 // TODO: GEH - this may be is wrong because some sync would be necessary
2523 // in case threads are added to the pool during the traversal.
2524 // Need to verify that lock for thread pool is held when calling
2525 // this routine.
2526 for (thread = (kmp_info_t *)__kmp_thread_pool;
2527 thread != NULL;
2528 thread = thread->th.th_next_pool)
2529 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002530#if KMP_OS_WINDOWS
2531 DWORD exit_val;
2532#endif
2533 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2534 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2535 __kmp_gtid_from_thread( thread ) ) );
2536 continue;
2537 }
2538#if KMP_OS_WINDOWS
2539 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2540 if (!__kmp_is_thread_alive(thread, &exit_val)) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002541 thread->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002542 continue;
2543 }
2544#endif
2545
2546 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2547
2548 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2549 __kmp_gtid_from_thread( thread ) ) );
2550
2551 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002552 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002553 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002554 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002555 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2556 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002557 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002558 }
2559 }
2560 }
2561 if (done) {
2562 break;
2563 }
2564
2565 // If we are oversubscribed,
2566 // or have waited a bit (and library mode is throughput), yield.
2567 // Pause is in the following code.
2568 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2569 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2570 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002571}
2572
2573
2574//------------------------------------------------------------------------------
2575// __kmp_task_team_setup: Create a task_team for the current team, but use
2576// an already created, unused one if it already exists.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002577void
Jonathan Peyton54127982015-11-04 21:37:48 +00002578__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002579{
2580 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2581
Jonathan Peyton54127982015-11-04 21:37:48 +00002582 // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
2583 // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
2584 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002585 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002586 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002587 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002588 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002589 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002590
2591 // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is
2592 // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
2593 // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
2594 // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for
2595 // serialized teams.
Jonathan Peytone1dad192015-11-30 20:05:13 +00002596 if (team->t.t_nproc > 1) {
2597 int other_team = 1 - this_thr->th.th_task_state;
2598 if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
2599 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2600 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2601 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2602 ((team != NULL) ? team->t.t_id : -1), other_team ));
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002603 }
Jonathan Peytone1dad192015-11-30 20:05:13 +00002604 else { // Leave the old task team struct in place for the upcoming region; adjust as needed
2605 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2606 if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
2607 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2608 TCW_4(task_team->tt.tt_found_tasks, FALSE);
2609#if OMP_41_ENABLED
2610 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2611#endif
2612 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
2613 TCW_4(task_team->tt.tt_active, TRUE );
2614 }
2615 // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
2616 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
2617 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2618 ((team != NULL) ? team->t.t_id : -1), other_team ));
2619 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002620 }
2621}
2622
2623
2624//------------------------------------------------------------------------------
2625// __kmp_task_team_sync: Propagation of task team data from team to threads
2626// which happens just after the release phase of a team barrier. This may be
2627// called by any thread, but only for teams with # threads > 1.
2628
2629void
2630__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2631{
2632 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2633
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002634 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002635 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002636 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2637 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jonathan Peyton54127982015-11-04 21:37:48 +00002638 KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002639 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2640 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002641}
2642
2643
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002644//--------------------------------------------------------------------------------------------
2645// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
Jonathan Peyton54127982015-11-04 21:37:48 +00002646// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created.
2647// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
2648// optionally as the last argument. When wait is zero, master thread does not wait for
2649// unfinished_threads to reach 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002650void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002651__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002652 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jonathan Peyton54127982015-11-04 21:37:48 +00002653 , int wait)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002654{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002655 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002656
2657 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2658 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2659
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002660 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002661 if (wait) {
2662 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2663 __kmp_gtid_from_thread(this_thr), task_team));
2664 // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
2665 // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
2666 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2667 flag.wait(this_thr, TRUE
2668 USE_ITT_BUILD_ARG(itt_sync_obj));
2669 }
2670 // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
2671 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
2672 "setting active to false, setting local and team's pointer to NULL\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002673 __kmp_gtid_from_thread(this_thr), task_team));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002674#if OMP_41_ENABLED
2675 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2676 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2677#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002678 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002679#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002680 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2681 KMP_MB();
2682
2683 TCW_PTR(this_thr->th.th_task_team, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002684 }
2685}
2686
2687
2688//------------------------------------------------------------------------------
2689// __kmp_tasking_barrier:
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002690// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002691// Internal function to execute all tasks prior to a regular barrier or a
2692// join barrier. It is a full barrier itself, which unfortunately turns
2693// regular barriers into double barriers and join barriers into 1 1/2
2694// barriers.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002695void
2696__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2697{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002698 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002699 int flag = FALSE;
2700 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2701
2702#if USE_ITT_BUILD
2703 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2704#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002705 kmp_flag_32 spin_flag(spin, 0U);
2706 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2707 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002708#if USE_ITT_BUILD
2709 // TODO: What about itt_sync_obj??
2710 KMP_FSYNC_SPIN_PREPARE( spin );
2711#endif /* USE_ITT_BUILD */
2712
2713 if( TCR_4(__kmp_global.g.g_done) ) {
2714 if( __kmp_global.g.g_abort )
2715 __kmp_abort_thread( );
2716 break;
2717 }
2718 KMP_YIELD( TRUE ); // GH: We always yield here
2719 }
2720#if USE_ITT_BUILD
2721 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2722#endif /* USE_ITT_BUILD */
2723}
2724
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002725
2726#if OMP_41_ENABLED
2727
2728/* __kmp_give_task puts a task into a given thread queue if:
Jonathan Peytonff684e42016-02-11 22:58:29 +00002729 - the queue for that thread was created
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002730 - there's space in that queue
2731
2732 Because of this, __kmp_push_task needs to check if there's space after getting the lock
2733 */
2734static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2735{
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002736 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002737 kmp_task_team_t * task_team = taskdata->td_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002738
2739 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2740
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002741 // If task_team is NULL something went really bad...
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002742 KMP_DEBUG_ASSERT( task_team != NULL );
2743
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002744 bool result = false;
2745 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2746
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002747 if (thread_data -> td.td_deque == NULL ) {
2748 // There's no queue in this thread, go find another one
2749 // We're guaranteed that at least one thread has a queue
2750 KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2751 return result;
2752 }
2753
2754 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2755 {
2756 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2757 return result;
2758 }
2759
2760 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2761
2762 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2763 {
2764 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2765 goto release_and_exit;
2766 }
2767
2768 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2769 // Wrap index.
2770 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2771 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2772
2773 result = true;
Jonathan Peyton1406f012015-05-22 22:35:51 +00002774 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002775
2776release_and_exit:
2777 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2778
2779 return result;
2780}
2781
2782
2783/* The finish of the a proxy tasks is divided in two pieces:
2784 - the top half is the one that can be done from a thread outside the team
2785 - the bottom half must be run from a them within the team
2786
2787 In order to run the bottom half the task gets queued back into one of the threads of the team.
2788 Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2789 So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2790 - things that can be run before queuing the bottom half
2791 - things that must be run after queuing the bottom half
2792
2793 This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2794 we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2795*/
2796
2797static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2798{
2799 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2800 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2801 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2802 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2803
2804 taskdata -> td_flags.complete = 1; // mark the task as completed
2805
2806 if ( taskdata->td_taskgroup )
2807 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2808
2809 // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
2810 TCR_4(taskdata->td_incomplete_child_tasks++);
2811}
2812
2813static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2814{
2815 kmp_int32 children = 0;
2816
2817 // Predecrement simulated by "- 1" calculation
2818 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2819 KMP_DEBUG_ASSERT( children >= 0 );
2820
2821 // Remove the imaginary children
2822 TCR_4(taskdata->td_incomplete_child_tasks--);
2823}
2824
2825static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2826{
2827 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2828 kmp_info_t * thread = __kmp_threads[ gtid ];
2829
2830 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2831 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2832
2833 // We need to wait to make sure the top half is finished
2834 // Spinning here should be ok as this should happen quickly
2835 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2836
2837 __kmp_release_deps(gtid,taskdata);
2838 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2839}
2840
2841/*!
2842@ingroup TASKING
2843@param gtid Global Thread ID of encountering thread
2844@param ptask Task which execution is completed
2845
2846Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
2847*/
2848void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2849{
2850 KMP_DEBUG_ASSERT( ptask != NULL );
2851 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2852 KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2853
2854 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2855
2856 __kmp_first_top_half_finish_proxy(taskdata);
2857 __kmp_second_top_half_finish_proxy(taskdata);
2858 __kmp_bottom_half_finish_proxy(gtid,ptask);
2859
2860 KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2861}
2862
2863/*!
2864@ingroup TASKING
2865@param ptask Task which execution is completed
2866
2867Execute the completation of a proxy task from a thread that could not belong to the team.
2868*/
2869void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2870{
2871 KMP_DEBUG_ASSERT( ptask != NULL );
2872 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2873
2874 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2875
2876 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2877
2878 __kmp_first_top_half_finish_proxy(taskdata);
2879
Jonathan Peytonff684e42016-02-11 22:58:29 +00002880 // Enqueue task to complete bottom half completion from a thread within the corresponding team
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002881 kmp_team_t * team = taskdata->td_team;
2882 kmp_int32 nthreads = team->t.t_nproc;
2883 kmp_info_t *thread;
2884 kmp_int32 k = 0;
2885
2886 do {
Jonathan Peyton1406f012015-05-22 22:35:51 +00002887 //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002888 //For now we're just linearly trying to find a thread
2889 k = (k+1) % nthreads;
2890 thread = team->t.t_threads[k];
2891 } while ( !__kmp_give_task( thread, k, ptask ) );
2892
2893 __kmp_second_top_half_finish_proxy(taskdata);
2894
2895 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2896}
2897
Jonathan Peyton283a2152016-03-02 22:47:51 +00002898//---------------------------------------------------------------------------------
2899// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task for taskloop
2900//
2901// thread: allocating thread
2902// task_src: pointer to source task to be duplicated
2903// returns: a pointer to the allocated kmp_task_t structure (task).
2904kmp_task_t *
2905__kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src )
2906{
2907 kmp_task_t *task;
2908 kmp_taskdata_t *taskdata;
2909 kmp_taskdata_t *taskdata_src;
2910 kmp_taskdata_t *parent_task = thread->th.th_current_task;
2911 size_t shareds_offset;
2912 size_t task_size;
2913
2914 KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) );
2915 taskdata_src = KMP_TASK_TO_TASKDATA( task_src );
2916 KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL ); // it should not be proxy task
2917 KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT );
2918 task_size = taskdata_src->td_size_alloc;
2919
2920 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
2921 KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) );
2922 #if USE_FAST_MEMORY
2923 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size );
2924 #else
2925 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size );
2926 #endif /* USE_FAST_MEMORY */
2927 KMP_MEMCPY(taskdata, taskdata_src, task_size);
2928
2929 task = KMP_TASKDATA_TO_TASK(taskdata);
2930
2931 // Initialize new task (only specific fields not affected by memcpy)
2932 taskdata->td_task_id = KMP_GEN_TASK_ID();
2933 if( task->shareds != NULL ) { // need setup shareds pointer
2934 shareds_offset = (char*)task_src->shareds - (char*)taskdata_src;
2935 task->shareds = &((char*)taskdata)[shareds_offset];
2936 KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (sizeof(void*)-1)) == 0 );
2937 }
2938 taskdata->td_alloc_thread = thread;
2939 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
2940
2941 // Only need to keep track of child task counts if team parallel and tasking not serialized
2942 if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) {
2943 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
2944 if ( parent_task->td_taskgroup )
2945 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
2946 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
2947 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT )
2948 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
2949 }
2950
2951 KA_TRACE(20, ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
2952 thread, taskdata, taskdata->td_parent) );
2953#if OMPT_SUPPORT
2954 __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (void*)task->routine);
2955#endif
2956 return task;
2957}
2958
2959// Routine optionally generated by th ecompiler for setting the lastprivate flag
2960// and calling needed constructors for private/firstprivate objects
2961// (used to form taskloop tasks from pattern task)
2962typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
2963
2964//---------------------------------------------------------------------------------
2965// __kmp_taskloop_linear: Start tasks of the taskloop linearly
2966//
2967// loc Source location information
2968// gtid Global thread ID
2969// task Task with whole loop iteration range
2970// lb Pointer to loop lower bound
2971// ub Pointer to loop upper bound
2972// st Loop stride
2973// sched Schedule specified 0/1/2 for none/grainsize/num_tasks
2974// grainsize Schedule value if specified
2975// task_dup Tasks duplication routine
2976void
2977__kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
2978 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
2979 int sched, kmp_uint64 grainsize, void *task_dup )
2980{
2981 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
2982 kmp_uint64 tc;
2983 kmp_uint64 lower = *lb; // compiler provides global bounds here
2984 kmp_uint64 upper = *ub;
Samuel Antao11e4c532016-03-12 00:55:17 +00002985 kmp_uint64 i, num_tasks = 0, extras = 0;
Jonathan Peyton283a2152016-03-02 22:47:51 +00002986 kmp_info_t *thread = __kmp_threads[gtid];
2987 kmp_taskdata_t *current_task = thread->th.th_current_task;
2988 kmp_task_t *next_task;
2989 kmp_int32 lastpriv = 0;
2990 size_t lower_offset = (char*)lb - (char*)task; // remember offset of lb in the task structure
2991 size_t upper_offset = (char*)ub - (char*)task; // remember offset of ub in the task structure
2992
2993 // compute trip count
2994 if ( st == 1 ) { // most common case
2995 tc = upper - lower + 1;
2996 } else if ( st < 0 ) {
2997 tc = (lower - upper) / (-st) + 1;
2998 } else { // st > 0
2999 tc = (upper - lower) / st + 1;
3000 }
3001 if(tc == 0) {
3002 // free the pattern task and exit
3003 __kmp_task_start( gtid, task, current_task );
3004 // do not execute anything for zero-trip loop
3005 __kmp_task_finish( gtid, task, current_task );
3006 return;
3007 }
3008
3009 // compute num_tasks/grainsize based on the input provided
3010 switch( sched ) {
3011 case 0: // no schedule clause specified, we can choose the default
3012 // let's try to schedule (team_size*10) tasks
3013 grainsize = thread->th.th_team_nproc * 10;
3014 case 2: // num_tasks provided
3015 if( grainsize > tc ) {
3016 num_tasks = tc; // too big num_tasks requested, adjust values
3017 grainsize = 1;
3018 extras = 0;
3019 } else {
3020 num_tasks = grainsize;
3021 grainsize = tc / num_tasks;
3022 extras = tc % num_tasks;
3023 }
3024 break;
3025 case 1: // grainsize provided
3026 if( grainsize > tc ) {
3027 num_tasks = 1; // too big grainsize requested, adjust values
3028 grainsize = tc;
3029 extras = 0;
3030 } else {
3031 num_tasks = tc / grainsize;
3032 grainsize = tc / num_tasks; // adjust grainsize for balanced distribution of iterations
3033 extras = tc % num_tasks;
3034 }
3035 break;
3036 default:
3037 KMP_ASSERT2(0, "unknown scheduling of taskloop");
3038 }
3039 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3040 KMP_DEBUG_ASSERT(num_tasks > extras);
3041 KMP_DEBUG_ASSERT(num_tasks > 0);
3042
3043 // Main loop, launch num_tasks tasks, assign grainsize iterations each task
3044 for( i = 0; i < num_tasks; ++i ) {
3045 kmp_uint64 chunk_minus_1;
3046 if( extras == 0 ) {
3047 chunk_minus_1 = grainsize - 1;
3048 } else {
3049 chunk_minus_1 = grainsize;
3050 --extras; // first extras iterations get bigger chunk (grainsize+1)
3051 }
3052 upper = lower + st * chunk_minus_1;
3053 if( i == num_tasks - 1 ) {
3054 // schedule the last task, set lastprivate flag
3055 lastpriv = 1;
3056#if KMP_DEBUG
3057 if( st == 1 )
3058 KMP_DEBUG_ASSERT(upper == *ub);
3059 else if( st > 0 )
3060 KMP_DEBUG_ASSERT(upper+st > *ub);
3061 else
3062 KMP_DEBUG_ASSERT(upper+st < *ub);
3063#endif
3064 }
3065 next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
3066 *(kmp_uint64*)((char*)next_task + lower_offset) = lower; // adjust task-specific bounds
3067 *(kmp_uint64*)((char*)next_task + upper_offset) = upper;
3068 if( ptask_dup != NULL )
3069 ptask_dup(next_task, task, lastpriv); // set lastprivate flag, construct fistprivates, etc.
3070 __kmp_omp_task(gtid, next_task, true); // schedule new task
3071 lower = upper + st; // adjust lower bound for the next iteration
3072 }
3073 // free the pattern task and exit
3074 __kmp_task_start( gtid, task, current_task );
3075 // do not execute the pattern task, just do bookkeeping
3076 __kmp_task_finish( gtid, task, current_task );
3077}
3078
3079/*!
3080@ingroup TASKING
3081@param loc Source location information
3082@param gtid Global thread ID
3083@param task Task structure
3084@param if_val Value of the if clause
3085@param lb Pointer to loop lower bound
3086@param ub Pointer to loop upper bound
3087@param st Loop stride
3088@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
3089@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3090@param grainsize Schedule value if specified
3091@param task_dup Tasks duplication routine
3092
3093Execute the taskloop construct.
3094*/
3095void
3096__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
3097 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3098 int nogroup, int sched, kmp_uint64 grainsize, void *task_dup )
3099{
3100 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
3101 KMP_DEBUG_ASSERT( task != NULL );
3102
3103 KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n",
3104 gtid, taskdata, *lb, *ub, st, grainsize, sched));
3105
3106 // check if clause value first
3107 if( if_val == 0 ) { // if(0) specified, mark task as serial
3108 taskdata->td_flags.task_serial = 1;
3109 taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
3110 }
3111 if( nogroup == 0 ) {
3112 __kmpc_taskgroup( loc, gtid );
3113 }
3114
3115 if( 1 /* AC: use some heuristic here to choose task scheduling method */ ) {
3116 __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup );
3117 }
3118
3119 if( nogroup == 0 ) {
3120 __kmpc_end_taskgroup( loc, gtid );
3121 }
3122 KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
3123}
3124
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003125#endif