blob: 3cfd3c53ab55131e30afbcf28affa4112aed3be7 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_tasking.c -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jonathan Peyton45be4502015-08-11 21:36:41 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanove5f44922015-04-29 16:22:07 +000022#if OMPT_SUPPORT
23#include "ompt-specific.h"
24#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
Jim Cownie5e8470a2013-09-27 10:38:44 +000026/* ------------------------------------------------------------------------ */
27/* ------------------------------------------------------------------------ */
28
29
30/* forward declaration */
31static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
34
Andrey Churbanov535b6fa2015-05-07 17:41:51 +000035#ifdef OMP_41_ENABLED
36static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
37#endif
38
Jim Cownie4cc4bb42014-10-07 16:25:50 +000039static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
Jonathan Peytona0e159f2015-10-08 18:23:38 +000040 if (!flag) return;
Jonathan Peyton3f5dfc22015-11-09 16:31:51 +000041 // Attempt to wake up a thread: examine its type and call appropriate template
Jim Cownie4cc4bb42014-10-07 16:25:50 +000042 switch (((kmp_flag_64 *)flag)->get_type()) {
43 case flag32: __kmp_resume_32(gtid, NULL); break;
44 case flag64: __kmp_resume_64(gtid, NULL); break;
45 case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
46 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000047}
48
49#ifdef BUILD_TIED_TASK_STACK
50
51//---------------------------------------------------------------------------
52// __kmp_trace_task_stack: print the tied tasks from the task stack in order
53// from top do bottom
54//
55// gtid: global thread identifier for thread containing stack
56// thread_data: thread data for task team thread containing stack
57// threshold: value above which the trace statement triggers
58// location: string identifying call site of this function (for trace)
59
60static void
61__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
62{
63 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
64 kmp_taskdata_t **stack_top = task_stack -> ts_top;
65 kmp_int32 entries = task_stack -> ts_entries;
66 kmp_taskdata_t *tied_task;
67
68 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
69 "first_block = %p, stack_top = %p \n",
70 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
71
72 KMP_DEBUG_ASSERT( stack_top != NULL );
73 KMP_DEBUG_ASSERT( entries > 0 );
74
75 while ( entries != 0 )
76 {
77 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
78 // fix up ts_top if we need to pop from previous block
79 if ( entries & TASK_STACK_INDEX_MASK == 0 )
80 {
81 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
82
83 stack_block = stack_block -> sb_prev;
84 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
85 }
86
87 // finish bookkeeping
88 stack_top--;
89 entries--;
90
91 tied_task = * stack_top;
92
93 KMP_DEBUG_ASSERT( tied_task != NULL );
94 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
95
96 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
97 "stack_top=%p, tied_task=%p\n",
98 location, gtid, entries, stack_top, tied_task ) );
99 }
100 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
101
102 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
103 location, gtid ) );
104}
105
106//---------------------------------------------------------------------------
107// __kmp_init_task_stack: initialize the task stack for the first time
108// after a thread_data structure is created.
109// It should not be necessary to do this again (assuming the stack works).
110//
111// gtid: global thread identifier of calling thread
112// thread_data: thread data for task team thread containing stack
113
114static void
115__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
116{
117 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
118 kmp_stack_block_t *first_block;
119
120 // set up the first block of the stack
121 first_block = & task_stack -> ts_first_block;
122 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
123 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
124
125 // initialize the stack to be empty
126 task_stack -> ts_entries = TASK_STACK_EMPTY;
127 first_block -> sb_next = NULL;
128 first_block -> sb_prev = NULL;
129}
130
131
132//---------------------------------------------------------------------------
133// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
134//
135// gtid: global thread identifier for calling thread
136// thread_data: thread info for thread containing stack
137
138static void
139__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
140{
141 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
142 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
143
144 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
145 // free from the second block of the stack
146 while ( stack_block != NULL ) {
147 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
148
149 stack_block -> sb_next = NULL;
150 stack_block -> sb_prev = NULL;
151 if (stack_block != & task_stack -> ts_first_block) {
152 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
153 }
154 stack_block = next_block;
155 }
156 // initialize the stack to be empty
157 task_stack -> ts_entries = 0;
158 task_stack -> ts_top = NULL;
159}
160
161
162//---------------------------------------------------------------------------
163// __kmp_push_task_stack: Push the tied task onto the task stack.
164// Grow the stack if necessary by allocating another block.
165//
166// gtid: global thread identifier for calling thread
167// thread: thread info for thread containing stack
168// tied_task: the task to push on the stack
169
170static void
171__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
172{
173 // GEH - need to consider what to do if tt_threads_data not allocated yet
174 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
175 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
176 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
177
178 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
179 return; // Don't push anything on stack if team or team tasks are serialized
180 }
181
182 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
183 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
184
185 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
186 gtid, thread, tied_task ) );
187 // Store entry
188 * (task_stack -> ts_top) = tied_task;
189
190 // Do bookkeeping for next push
191 task_stack -> ts_top++;
192 task_stack -> ts_entries++;
193
194 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
195 {
196 // Find beginning of this task block
197 kmp_stack_block_t *stack_block =
198 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
199
200 // Check if we already have a block
201 if ( stack_block -> sb_next != NULL )
202 { // reset ts_top to beginning of next block
203 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
204 }
205 else
206 { // Alloc new block and link it up
207 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
208 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
209
210 task_stack -> ts_top = & new_block -> sb_block[0];
211 stack_block -> sb_next = new_block;
212 new_block -> sb_prev = stack_block;
213 new_block -> sb_next = NULL;
214
215 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
216 gtid, tied_task, new_block ) );
217 }
218 }
219 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
220}
221
222//---------------------------------------------------------------------------
223// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
224// the task, just check to make sure it matches the ending task passed in.
225//
226// gtid: global thread identifier for the calling thread
227// thread: thread info structure containing stack
228// tied_task: the task popped off the stack
229// ending_task: the task that is ending (should match popped task)
230
231static void
232__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
233{
234 // GEH - need to consider what to do if tt_threads_data not allocated yet
235 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
236 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
237 kmp_taskdata_t *tied_task;
238
239 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
240 return; // Don't pop anything from stack if team or team tasks are serialized
241 }
242
243 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
244 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
245
246 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
247
248 // fix up ts_top if we need to pop from previous block
249 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
250 {
251 kmp_stack_block_t *stack_block =
252 (kmp_stack_block_t *) (task_stack -> ts_top) ;
253
254 stack_block = stack_block -> sb_prev;
255 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
256 }
257
258 // finish bookkeeping
259 task_stack -> ts_top--;
260 task_stack -> ts_entries--;
261
262 tied_task = * (task_stack -> ts_top );
263
264 KMP_DEBUG_ASSERT( tied_task != NULL );
265 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
266 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
267
268 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
269 return;
270}
271#endif /* BUILD_TIED_TASK_STACK */
272
273//---------------------------------------------------
274// __kmp_push_task: Add a task to the thread's deque
275
276static kmp_int32
277__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
278{
279 kmp_info_t * thread = __kmp_threads[ gtid ];
280 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
281 kmp_task_team_t * task_team = thread->th.th_task_team;
282 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
283 kmp_thread_data_t * thread_data;
284
285 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
286
287 // The first check avoids building task_team thread data if serialized
288 if ( taskdata->td_flags.task_serial ) {
289 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
290 gtid, taskdata ) );
291 return TASK_NOT_PUSHED;
292 }
293
294 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
295 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000296 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000297 __kmp_enable_tasking( task_team, thread );
298 }
299 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
300 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
301
302 // Find tasking deque specific to encountering thread
303 thread_data = & task_team -> tt.tt_threads_data[ tid ];
304
305 // No lock needed since only owner can allocate
306 if (thread_data -> td.td_deque == NULL ) {
307 __kmp_alloc_task_deque( thread, thread_data );
308 }
309
310 // Check if deque is full
311 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
312 {
313 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
314 gtid, taskdata ) );
315 return TASK_NOT_PUSHED;
316 }
317
318 // Lock the deque for the task push operation
319 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
320
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000321#if OMP_41_ENABLED
322 // Need to recheck as we can get a proxy task from a thread outside of OpenMP
323 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
324 {
325 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
326 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
327 gtid, taskdata ) );
328 return TASK_NOT_PUSHED;
329 }
330#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000331 // Must have room since no thread can add tasks but calling thread
332 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000333#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000334
335 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
336 // Wrap index.
337 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
338 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
339
340 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
341
342 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
343 "task=%p ntasks=%d head=%u tail=%u\n",
344 gtid, taskdata, thread_data->td.td_deque_ntasks,
345 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
346
347 return TASK_SUCCESSFULLY_PUSHED;
348}
349
350
351//-----------------------------------------------------------------------------------------
352// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
353// this_thr: thread structure to set current_task in.
354
355void
356__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
357{
358 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
359 "curtask_parent=%p\n",
360 0, this_thr, this_thr -> th.th_current_task,
361 this_thr -> th.th_current_task -> td_parent ) );
362
363 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
364
365 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
366 "curtask_parent=%p\n",
367 0, this_thr, this_thr -> th.th_current_task,
368 this_thr -> th.th_current_task -> td_parent ) );
369}
370
371
372//---------------------------------------------------------------------------------------
373// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
374// this_thr: thread structure to set up
375// team: team for implicit task data
376// tid: thread within team to set up
377
378void
379__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
380{
381 // current task of the thread is a parent of the new just created implicit tasks of new team
382 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
383 "parent_task=%p\n",
384 tid, this_thr, this_thr->th.th_current_task,
385 team->t.t_implicit_task_taskdata[tid].td_parent ) );
386
387 KMP_DEBUG_ASSERT (this_thr != NULL);
388
389 if( tid == 0 ) {
390 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
391 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
392 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
393 }
394 } else {
395 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
396 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
397 }
398
399 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
400 "parent_task=%p\n",
401 tid, this_thr, this_thr->th.th_current_task,
402 team->t.t_implicit_task_taskdata[tid].td_parent ) );
403}
404
405
406//----------------------------------------------------------------------
407// __kmp_task_start: bookkeeping for a task starting execution
408// GTID: global thread id of calling thread
409// task: task starting execution
410// current_task: task suspending
411
412static void
413__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
414{
415 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
416 kmp_info_t * thread = __kmp_threads[ gtid ];
417
418 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
419 gtid, taskdata, current_task) );
420
421 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
422
423 // mark currently executing task as suspended
424 // TODO: GEH - make sure root team implicit task is initialized properly.
425 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
426 current_task -> td_flags.executing = 0;
427
428 // Add task to stack if tied
429#ifdef BUILD_TIED_TASK_STACK
430 if ( taskdata -> td_flags.tiedness == TASK_TIED )
431 {
432 __kmp_push_task_stack( gtid, thread, taskdata );
433 }
434#endif /* BUILD_TIED_TASK_STACK */
435
436 // mark starting task as executing and as current task
437 thread -> th.th_current_task = taskdata;
438
439 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
440 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
441 taskdata -> td_flags.started = 1;
442 taskdata -> td_flags.executing = 1;
443 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
444 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
445
446 // GEH TODO: shouldn't we pass some sort of location identifier here?
447 // APT: yes, we will pass location here.
448 // need to store current thread state (in a thread or taskdata structure)
449 // before setting work_state, otherwise wrong state is set after end of task
450
451 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
452 gtid, taskdata ) );
453
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000454#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000455 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000456 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
457 kmp_taskdata_t *parent = taskdata->td_parent;
458 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
459 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
460 parent ? &(parent->ompt_task_info.frame) : NULL,
461 taskdata->ompt_task_info.task_id,
462 taskdata->ompt_task_info.function);
463 }
464#endif
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000465#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
466 /* OMPT emit all dependences if requested by the tool */
467 if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
468 ompt_callbacks.ompt_callback(ompt_event_task_dependences))
469 {
470 ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
471 taskdata->ompt_task_info.task_id,
472 taskdata->ompt_task_info.deps,
473 taskdata->ompt_task_info.ndeps
474 );
475 /* We can now free the allocated memory for the dependencies */
476 KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps);
477 taskdata->ompt_task_info.deps = NULL;
478 taskdata->ompt_task_info.ndeps = 0;
479 }
480#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000481
Jim Cownie5e8470a2013-09-27 10:38:44 +0000482 return;
483}
484
485
486//----------------------------------------------------------------------
487// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
488// loc_ref: source location information; points to beginning of task block.
489// gtid: global thread number.
490// task: task thunk for the started task.
491
492void
493__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
494{
495 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
496 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
497
498 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
499 gtid, loc_ref, taskdata, current_task ) );
500
501 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
502 __kmp_task_start( gtid, task, current_task );
503
504 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
505 gtid, loc_ref, taskdata ) );
506
507 return;
508}
509
510#ifdef TASK_UNUSED
511//----------------------------------------------------------------------
512// __kmpc_omp_task_begin: report that a given task has started execution
513// NEVER GENERATED BY COMPILER, DEPRECATED!!!
514
515void
516__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
517{
518 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
519
520 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
521 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
522
523 __kmp_task_start( gtid, task, current_task );
524
525 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
526 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
527
528 return;
529}
530#endif // TASK_UNUSED
531
532
533//-------------------------------------------------------------------------------------
534// __kmp_free_task: free the current task space and the space for shareds
535// gtid: Global thread ID of calling thread
536// taskdata: task to free
537// thread: thread data structure of caller
538
539static void
540__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
541{
542 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
543 gtid, taskdata) );
544
545 // Check to make sure all flags and counters have the correct values
546 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
547 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
548 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
549 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
550 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
551 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
552
553 taskdata->td_flags.freed = 1;
554 // deallocate the taskdata and shared variable blocks associated with this task
555 #if USE_FAST_MEMORY
556 __kmp_fast_free( thread, taskdata );
557 #else /* ! USE_FAST_MEMORY */
558 __kmp_thread_free( thread, taskdata );
559 #endif
560
561 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
562 gtid, taskdata) );
563}
564
565//-------------------------------------------------------------------------------------
566// __kmp_free_task_and_ancestors: free the current task and ancestors without children
567//
568// gtid: Global thread ID of calling thread
569// taskdata: task to free
570// thread: thread data structure of caller
571
572static void
573__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
574{
575 kmp_int32 children = 0;
576 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
577
578 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
579
580 if ( !team_or_tasking_serialized ) {
581 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
582 KMP_DEBUG_ASSERT( children >= 0 );
583 }
584
585 // Now, go up the ancestor tree to see if any ancestors can now be freed.
586 while ( children == 0 )
587 {
588 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
589
590 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
591 "and freeing itself\n", gtid, taskdata) );
592
593 // --- Deallocate my ancestor task ---
594 __kmp_free_task( gtid, taskdata, thread );
595
596 taskdata = parent_taskdata;
597
598 // Stop checking ancestors at implicit task or if tasking serialized
599 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
600 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
601 return;
602
603 if ( !team_or_tasking_serialized ) {
604 // Predecrement simulated by "- 1" calculation
605 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
606 KMP_DEBUG_ASSERT( children >= 0 );
607 }
608 }
609
610 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
611 "not freeing it yet\n", gtid, taskdata, children) );
612}
613
614//---------------------------------------------------------------------
615// __kmp_task_finish: bookkeeping to do when a task finishes execution
616// gtid: global thread ID for calling thread
617// task: task to be finished
618// resumed_task: task to be resumed. (may be NULL if task is serialized)
619
620static void
621__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
622{
623 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
624 kmp_info_t * thread = __kmp_threads[ gtid ];
625 kmp_int32 children = 0;
626
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000627#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000628 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000629 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
630 kmp_taskdata_t *parent = taskdata->td_parent;
631 ompt_callbacks.ompt_callback(ompt_event_task_end)(
632 taskdata->ompt_task_info.task_id);
633 }
634#endif
635
Jim Cownie5e8470a2013-09-27 10:38:44 +0000636 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
637 gtid, taskdata, resumed_task) );
638
639 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
640
641 // Pop task from stack if tied
642#ifdef BUILD_TIED_TASK_STACK
643 if ( taskdata -> td_flags.tiedness == TASK_TIED )
644 {
645 __kmp_pop_task_stack( gtid, thread, taskdata );
646 }
647#endif /* BUILD_TIED_TASK_STACK */
648
Jim Cownie5e8470a2013-09-27 10:38:44 +0000649 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000650 taskdata -> td_flags.complete = 1; // mark the task as completed
651 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
652 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
653
654 // Only need to keep track of count if team parallel and tasking not serialized
655 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
656 // Predecrement simulated by "- 1" calculation
657 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
658 KMP_DEBUG_ASSERT( children >= 0 );
659#if OMP_40_ENABLED
660 if ( taskdata->td_taskgroup )
661 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jim Cownie181b4bb2013-12-23 17:28:57 +0000662 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000663#endif
664 }
665
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000666 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
667 // Othertwise, if a task is executed immediately from the release_deps code
668 // the flag will be reset to 1 again by this same function
669 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
670 taskdata -> td_flags.executing = 0; // suspend the finishing task
671
Jim Cownie5e8470a2013-09-27 10:38:44 +0000672 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
673 gtid, taskdata, children) );
674
Jim Cownie181b4bb2013-12-23 17:28:57 +0000675#if OMP_40_ENABLED
676 /* If the tasks' destructor thunk flag has been set, we need to invoke the
677 destructor thunk that has been generated by the compiler.
678 The code is placed here, since at this point other tasks might have been released
679 hence overlapping the destructor invokations with some other work in the
680 released tasks. The OpenMP spec is not specific on when the destructors are
681 invoked, so we should be free to choose.
Jonathan Peyton28510722016-02-25 18:04:09 +0000682 */
Jim Cownie181b4bb2013-12-23 17:28:57 +0000683 if (taskdata->td_flags.destructors_thunk) {
Jonathan Peyton28510722016-02-25 18:04:09 +0000684 kmp_routine_entry_t destr_thunk = task->data1.destructors;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000685 KMP_ASSERT(destr_thunk);
686 destr_thunk(gtid, task);
687 }
688#endif // OMP_40_ENABLED
689
Jim Cownie5e8470a2013-09-27 10:38:44 +0000690 // bookkeeping for resuming task:
691 // GEH - note tasking_ser => task_serial
692 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
693 taskdata->td_flags.task_serial);
694 if ( taskdata->td_flags.task_serial )
695 {
696 if (resumed_task == NULL) {
697 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
698 }
699 else {
700 // verify resumed task passed in points to parent
701 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
702 }
703 }
704 else {
705 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
706 }
707
708 // Free this task and then ancestor tasks if they have no children.
Jonathan Peyton727ba6e2016-01-27 21:20:26 +0000709 // Restore th_current_task first as suggested by John:
710 // johnmc: if an asynchronous inquiry peers into the runtime system
711 // it doesn't see the freed task as the current task.
712 thread->th.th_current_task = resumed_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000713 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
714
Jim Cownie5e8470a2013-09-27 10:38:44 +0000715 // TODO: GEH - make sure root team implicit task is initialized properly.
716 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
717 resumed_task->td_flags.executing = 1; // resume previous task
718
719 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
720 gtid, taskdata, resumed_task) );
721
722 return;
723}
724
725//---------------------------------------------------------------------
726// __kmpc_omp_task_complete_if0: report that a task has completed execution
727// loc_ref: source location information; points to end of task block.
728// gtid: global thread number.
729// task: task thunk for the completed task.
730
731void
732__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
733{
734 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
735 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
736
737 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
738
739 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
740 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
741
742 return;
743}
744
745#ifdef TASK_UNUSED
746//---------------------------------------------------------------------
747// __kmpc_omp_task_complete: report that a task has completed execution
748// NEVER GENERATED BY COMPILER, DEPRECATED!!!
749
750void
751__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
752{
753 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
754 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
755
756 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
757
758 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
759 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
760 return;
761}
762#endif // TASK_UNUSED
763
764
Andrey Churbanove5f44922015-04-29 16:22:07 +0000765#if OMPT_SUPPORT
766//----------------------------------------------------------------------------------------------------
767// __kmp_task_init_ompt:
Jonathan Peytonb401db62015-10-09 17:38:05 +0000768// Initialize OMPT fields maintained by a task. This will only be called after
769// ompt_tool, so we already know whether ompt is enabled or not.
Andrey Churbanove5f44922015-04-29 16:22:07 +0000770
Jonathan Peytonb401db62015-10-09 17:38:05 +0000771static inline void
772__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function )
Andrey Churbanove5f44922015-04-29 16:22:07 +0000773{
Jonathan Peytonb401db62015-10-09 17:38:05 +0000774 if (ompt_enabled) {
775 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
776 task->ompt_task_info.function = function;
777 task->ompt_task_info.frame.exit_runtime_frame = NULL;
778 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000779#if OMP_40_ENABLED
780 task->ompt_task_info.ndeps = 0;
781 task->ompt_task_info.deps = NULL;
782#endif /* OMP_40_ENABLED */
Jonathan Peytonb401db62015-10-09 17:38:05 +0000783 }
Andrey Churbanove5f44922015-04-29 16:22:07 +0000784}
785#endif
786
787
Jim Cownie5e8470a2013-09-27 10:38:44 +0000788//----------------------------------------------------------------------------------------------------
789// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
790//
791// loc_ref: reference to source location of parallel region
792// this_thr: thread data structure corresponding to implicit task
793// team: team for this_thr
794// tid: thread id of given thread within team
795// set_curr_task: TRUE if need to push current task to thread
796// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
797// TODO: Get better loc_ref. Value passed in may be NULL
798
799void
800__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
801{
802 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
803
804 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
805 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
806
807 task->td_task_id = KMP_GEN_TASK_ID();
808 task->td_team = team;
809// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
810 task->td_ident = loc_ref;
811 task->td_taskwait_ident = NULL;
812 task->td_taskwait_counter = 0;
813 task->td_taskwait_thread = 0;
814
815 task->td_flags.tiedness = TASK_TIED;
816 task->td_flags.tasktype = TASK_IMPLICIT;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000817#if OMP_41_ENABLED
818 task->td_flags.proxy = TASK_FULL;
819#endif
820
Jim Cownie5e8470a2013-09-27 10:38:44 +0000821 // All implicit tasks are executed immediately, not deferred
822 task->td_flags.task_serial = 1;
823 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
824 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
825
826 task->td_flags.started = 1;
827 task->td_flags.executing = 1;
828 task->td_flags.complete = 0;
829 task->td_flags.freed = 0;
830
Jim Cownie181b4bb2013-12-23 17:28:57 +0000831#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000832 task->td_dephash = NULL;
833 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000834#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000835
836 if (set_curr_task) { // only do this initialization the first time a thread is created
837 task->td_incomplete_child_tasks = 0;
838 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
839#if OMP_40_ENABLED
840 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
841#endif
842 __kmp_push_current_task_to_thread( this_thr, team, tid );
843 } else {
844 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
845 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
846 }
847
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000848#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +0000849 __kmp_task_init_ompt(task, tid, NULL);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000850#endif
851
Jim Cownie5e8470a2013-09-27 10:38:44 +0000852 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
853 tid, team, task ) );
854}
855
856// Round up a size to a power of two specified by val
857// Used to insert padding between structures co-allocated using a single malloc() call
858static size_t
859__kmp_round_up_to_val( size_t size, size_t val ) {
860 if ( size & ( val - 1 ) ) {
861 size &= ~ ( val - 1 );
862 if ( size <= KMP_SIZE_T_MAX - val ) {
863 size += val; // Round up if there is no overflow.
864 }; // if
865 }; // if
866 return size;
867} // __kmp_round_up_to_va
868
869
870//---------------------------------------------------------------------------------
871// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
872//
873// loc_ref: source location information
874// gtid: global thread number.
875// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
876// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
877// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
878// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
879// task_entry: Pointer to task code entry point generated by compiler.
880// returns: a pointer to the allocated kmp_task_t structure (task).
881
882kmp_task_t *
883__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
884 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
885 kmp_routine_entry_t task_entry )
886{
887 kmp_task_t *task;
888 kmp_taskdata_t *taskdata;
889 kmp_info_t *thread = __kmp_threads[ gtid ];
890 kmp_team_t *team = thread->th.th_team;
891 kmp_taskdata_t *parent_task = thread->th.th_current_task;
892 size_t shareds_offset;
893
894 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
895 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
896 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
897 sizeof_shareds, task_entry) );
898
899 if ( parent_task->td_flags.final ) {
900 if (flags->merged_if0) {
901 }
902 flags->final = 1;
903 }
904
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000905#if OMP_41_ENABLED
906 if ( flags->proxy == TASK_PROXY ) {
907 flags->tiedness = TASK_UNTIED;
908 flags->merged_if0 = 1;
909
910 /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
911 if ( (thread->th.th_task_team) == NULL ) {
912 /* This should only happen if the team is serialized
913 setup a task team and propagate it to the thread
914 */
915 KMP_DEBUG_ASSERT(team->t.t_serialized);
916 KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
Jonathan Peyton54127982015-11-04 21:37:48 +0000917 __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000918 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
919 }
920 kmp_task_team_t * task_team = thread->th.th_task_team;
921
922 /* tasking must be enabled now as the task might not be pushed */
923 if ( !KMP_TASKING_ENABLED( task_team ) ) {
924 KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
925 __kmp_enable_tasking( task_team, thread );
926 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
927 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
928 // No lock needed since only owner can allocate
929 if (thread_data -> td.td_deque == NULL ) {
930 __kmp_alloc_task_deque( thread, thread_data );
931 }
932 }
933
934 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
935 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
936 }
937#endif
938
Jim Cownie5e8470a2013-09-27 10:38:44 +0000939 // Calculate shared structure offset including padding after kmp_task_t struct
940 // to align pointers in shared struct
941 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
942 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
943
944 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
945 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
946 gtid, shareds_offset) );
947 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
948 gtid, sizeof_shareds) );
949
950 // Avoid double allocation here by combining shareds with taskdata
951 #if USE_FAST_MEMORY
952 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
953 #else /* ! USE_FAST_MEMORY */
954 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
955 #endif /* USE_FAST_MEMORY */
956
957 task = KMP_TASKDATA_TO_TASK(taskdata);
958
959 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +0000960#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000961 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
962 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
963#else
964 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
965 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
966#endif
967 if (sizeof_shareds > 0) {
968 // Avoid double allocation here by combining shareds with taskdata
969 task->shareds = & ((char *) taskdata)[ shareds_offset ];
970 // Make sure shareds struct is aligned to pointer size
971 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
972 } else {
973 task->shareds = NULL;
974 }
975 task->routine = task_entry;
976 task->part_id = 0; // AC: Always start with 0 part id
977
978 taskdata->td_task_id = KMP_GEN_TASK_ID();
979 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000980 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000981 taskdata->td_parent = parent_task;
982 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
983 taskdata->td_ident = loc_ref;
984 taskdata->td_taskwait_ident = NULL;
985 taskdata->td_taskwait_counter = 0;
986 taskdata->td_taskwait_thread = 0;
987 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000988#if OMP_41_ENABLED
989 // avoid copying icvs for proxy tasks
990 if ( flags->proxy == TASK_FULL )
991#endif
992 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000993
994 taskdata->td_flags.tiedness = flags->tiedness;
995 taskdata->td_flags.final = flags->final;
996 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000997#if OMP_40_ENABLED
998 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
999#endif // OMP_40_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001000#if OMP_41_ENABLED
1001 taskdata->td_flags.proxy = flags->proxy;
Jonathan Peyton134f90d2016-02-11 23:07:30 +00001002 taskdata->td_task_team = thread->th.th_task_team;
Jonathan Peyton283a2152016-03-02 22:47:51 +00001003 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001004#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001005 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1006
1007 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1008 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
1009
1010 // GEH - TODO: fix this to copy parent task's value of team_serial flag
1011 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
1012
1013 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
1014 // tasks are not left until program termination to execute. Also, it helps locality to execute
1015 // immediately.
Jonathan Peyton7881aa12015-05-21 21:16:38 +00001016 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
Jim Cownie5e8470a2013-09-27 10:38:44 +00001017 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
1018
1019 taskdata->td_flags.started = 0;
1020 taskdata->td_flags.executing = 0;
1021 taskdata->td_flags.complete = 0;
1022 taskdata->td_flags.freed = 0;
1023
1024 taskdata->td_flags.native = flags->native;
1025
1026 taskdata->td_incomplete_child_tasks = 0;
1027 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1028#if OMP_40_ENABLED
1029 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1030 taskdata->td_dephash = NULL;
1031 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001032#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001033
1034 // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
1035#if OMP_41_ENABLED
1036 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1037#else
1038 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1039#endif
1040 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001041 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1042#if OMP_40_ENABLED
1043 if ( parent_task->td_taskgroup )
1044 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1045#endif
1046 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1047 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1048 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1049 }
1050 }
1051
1052 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1053 gtid, taskdata, taskdata->td_parent) );
1054
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001055#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +00001056 __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001057#endif
1058
Jim Cownie5e8470a2013-09-27 10:38:44 +00001059 return task;
1060}
1061
1062
1063kmp_task_t *
1064__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1065 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1066 kmp_routine_entry_t task_entry )
1067{
1068 kmp_task_t *retval;
1069 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1070
1071 input_flags->native = FALSE;
1072 // __kmp_task_alloc() sets up all other runtime flags
1073
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001074#if OMP_41_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001075 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001076 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1077 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001078 input_flags->proxy ? "proxy" : "",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001079 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001080#else
1081 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1082 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1083 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1084 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1085#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001086
1087 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1088 sizeof_shareds, task_entry );
1089
1090 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1091
1092 return retval;
1093}
1094
1095//-----------------------------------------------------------
1096// __kmp_invoke_task: invoke the specified task
1097//
1098// gtid: global thread ID of caller
1099// task: the task to invoke
1100// current_task: the task to resume after task invokation
1101
1102static void
1103__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1104{
1105 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001106#if OMP_40_ENABLED
1107 int discard = 0 /* false */;
1108#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001109 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1110 gtid, taskdata, current_task) );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00001111 KMP_DEBUG_ASSERT(task);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001112#if OMP_41_ENABLED
1113 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1114 taskdata->td_flags.complete == 1)
1115 {
1116 // This is a proxy task that was already completed but it needs to run
1117 // its bottom-half finish
1118 KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1119 gtid, taskdata) );
1120
1121 __kmp_bottom_half_finish_proxy(gtid,task);
1122
1123 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1124
1125 return;
1126 }
1127#endif
1128
1129#if OMP_41_ENABLED
1130 // Proxy tasks are not handled by the runtime
1131 if ( taskdata->td_flags.proxy != TASK_PROXY )
1132#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001133 __kmp_task_start( gtid, task, current_task );
1134
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001135#if OMPT_SUPPORT
1136 ompt_thread_info_t oldInfo;
1137 kmp_info_t * thread;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001138 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001139 // Store the threads states and restore them after the task
1140 thread = __kmp_threads[ gtid ];
1141 oldInfo = thread->th.ompt_thread_info;
1142 thread->th.ompt_thread_info.wait_id = 0;
1143 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1144 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1145 }
1146#endif
1147
Jim Cownie181b4bb2013-12-23 17:28:57 +00001148#if OMP_40_ENABLED
1149 // TODO: cancel tasks if the parallel region has also been cancelled
1150 // TODO: check if this sequence can be hoisted above __kmp_task_start
1151 // if cancellation has been enabled for this run ...
1152 if (__kmp_omp_cancellation) {
1153 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1154 kmp_team_t * this_team = this_thr->th.th_team;
1155 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1156 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001157 KMP_COUNT_BLOCK(TASK_cancelled);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001158 // this task belongs to a task group and we need to cancel it
1159 discard = 1 /* true */;
1160 }
1161 }
1162
Jim Cownie5e8470a2013-09-27 10:38:44 +00001163 //
1164 // Invoke the task routine and pass in relevant data.
1165 // Thunks generated by gcc take a different argument list.
1166 //
Jim Cownie181b4bb2013-12-23 17:28:57 +00001167 if (!discard) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001168 KMP_COUNT_BLOCK(TASK_executed);
Jonathan Peyton495e1532016-03-11 20:23:05 +00001169 KMP_TIME_BLOCK (OMP_task);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001170#endif // OMP_40_ENABLED
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001171
1172#if OMPT_SUPPORT && OMPT_TRACE
1173 /* let OMPT know that we're about to run this task */
1174 if (ompt_enabled &&
1175 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1176 {
1177 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1178 current_task->ompt_task_info.task_id,
1179 taskdata->ompt_task_info.task_id);
1180 }
1181#endif
1182
Jim Cownie5e8470a2013-09-27 10:38:44 +00001183#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +00001184 if (taskdata->td_flags.native) {
1185 ((void (*)(void *))(*(task->routine)))(task->shareds);
1186 }
1187 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001188#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001189 {
1190 (*(task->routine))(gtid, task);
1191 }
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001192
1193#if OMPT_SUPPORT && OMPT_TRACE
1194 /* let OMPT know that we're returning to the callee task */
1195 if (ompt_enabled &&
1196 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1197 {
1198 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1199 taskdata->ompt_task_info.task_id,
1200 current_task->ompt_task_info.task_id);
1201 }
1202#endif
1203
Jim Cownie181b4bb2013-12-23 17:28:57 +00001204#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001205 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001206#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001207
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001208
1209#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001210 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001211 thread->th.ompt_thread_info = oldInfo;
1212 taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1213 }
1214#endif
1215
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001216#if OMP_41_ENABLED
1217 // Proxy tasks are not handled by the runtime
1218 if ( taskdata->td_flags.proxy != TASK_PROXY )
1219#endif
1220 __kmp_task_finish( gtid, task, current_task );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001221
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001222 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001223 gtid, taskdata, current_task) );
1224 return;
1225}
1226
1227//-----------------------------------------------------------------------
1228// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1229//
1230// loc_ref: location of original task pragma (ignored)
1231// gtid: Global Thread ID of encountering thread
1232// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1233// Returns:
1234// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1235// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1236
1237kmp_int32
1238__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1239{
1240 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1241
1242 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1243 gtid, loc_ref, new_taskdata ) );
1244
1245 /* Should we execute the new task or queue it? For now, let's just always try to
1246 queue it. If the queue fills up, then we'll execute it. */
1247
1248 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1249 { // Execute this task immediately
1250 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1251 new_taskdata->td_flags.task_serial = 1;
1252 __kmp_invoke_task( gtid, new_task, current_task );
1253 }
1254
1255 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1256 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1257 new_taskdata ) );
1258
1259 return TASK_CURRENT_NOT_QUEUED;
1260}
1261
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001262//---------------------------------------------------------------------
1263// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1264// gtid: Global Thread ID of encountering thread
1265// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1266// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1267// returns:
1268//
1269// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1270// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1271kmp_int32
1272__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1273{
1274 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1275
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001276#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001277 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001278 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1279 __builtin_frame_address(0);
1280 }
1281#endif
1282
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001283 /* Should we execute the new task or queue it? For now, let's just always try to
1284 queue it. If the queue fills up, then we'll execute it. */
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001285#if OMP_41_ENABLED
1286 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1287#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001288 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001289#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001290 { // Execute this task immediately
1291 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1292 if ( serialize_immediate )
1293 new_taskdata -> td_flags.task_serial = 1;
1294 __kmp_invoke_task( gtid, new_task, current_task );
1295 }
1296
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001297#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001298 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001299 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1300 }
1301#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001302
1303 return TASK_CURRENT_NOT_QUEUED;
1304}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001305
1306//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001307// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1308// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001309// loc_ref: location of original task pragma (ignored)
1310// gtid: Global Thread ID of encountering thread
1311// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1312// returns:
1313//
1314// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1315// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1316
1317kmp_int32
1318__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1319{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001320 kmp_int32 res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001321
Jonathan Peytond2eb3c72015-08-26 20:02:21 +00001322#if KMP_DEBUG
1323 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1324#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001325 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1326 gtid, loc_ref, new_taskdata ) );
1327
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001328 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001329
1330 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1331 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001332 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001333}
1334
Jim Cownie5e8470a2013-09-27 10:38:44 +00001335//-------------------------------------------------------------------------------------
1336// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1337
1338kmp_int32
1339__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1340{
1341 kmp_taskdata_t * taskdata;
1342 kmp_info_t * thread;
1343 int thread_finished = FALSE;
1344
Jonathan Peyton54127982015-11-04 21:37:48 +00001345 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001346
1347 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1348 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1349
1350 thread = __kmp_threads[ gtid ];
1351 taskdata = thread -> th.th_current_task;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001352
1353#if OMPT_SUPPORT && OMPT_TRACE
1354 ompt_task_id_t my_task_id;
1355 ompt_parallel_id_t my_parallel_id;
1356
1357 if (ompt_enabled) {
1358 kmp_team_t *team = thread->th.th_team;
1359 my_task_id = taskdata->ompt_task_info.task_id;
1360 my_parallel_id = team->t.ompt_team_info.parallel_id;
1361
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001362 taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001363 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1364 ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
1365 my_parallel_id, my_task_id);
1366 }
1367 }
1368#endif
1369
Jim Cownie5e8470a2013-09-27 10:38:44 +00001370#if USE_ITT_BUILD
1371 // Note: These values are used by ITT events as well.
1372#endif /* USE_ITT_BUILD */
1373 taskdata->td_taskwait_counter += 1;
1374 taskdata->td_taskwait_ident = loc_ref;
1375 taskdata->td_taskwait_thread = gtid + 1;
1376
1377#if USE_ITT_BUILD
1378 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1379 if ( itt_sync_obj != NULL )
1380 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1381#endif /* USE_ITT_BUILD */
1382
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001383#if OMP_41_ENABLED
1384 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1385#else
1386 if ( ! taskdata->td_flags.team_serial )
1387#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001388 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001389 // GEH: if team serialized, avoid reading the volatile variable below.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001390 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001391 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001392 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1393 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001394 }
1395 }
1396#if USE_ITT_BUILD
1397 if ( itt_sync_obj != NULL )
1398 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1399#endif /* USE_ITT_BUILD */
1400
1401 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1402 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001403
1404#if OMPT_SUPPORT && OMPT_TRACE
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001405 if (ompt_enabled) {
1406 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1407 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001408 my_parallel_id, my_task_id);
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001409 }
1410 taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001411 }
1412#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001413 }
1414
1415 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1416 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1417
1418 return TASK_CURRENT_NOT_QUEUED;
1419}
1420
1421
1422//-------------------------------------------------
1423// __kmpc_omp_taskyield: switch to a different task
1424
1425kmp_int32
1426__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1427{
1428 kmp_taskdata_t * taskdata;
1429 kmp_info_t * thread;
1430 int thread_finished = FALSE;
1431
Jonathan Peyton45be4502015-08-11 21:36:41 +00001432 KMP_COUNT_BLOCK(OMP_TASKYIELD);
1433
Jim Cownie5e8470a2013-09-27 10:38:44 +00001434 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1435 gtid, loc_ref, end_part) );
1436
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001437 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001438 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1439
1440 thread = __kmp_threads[ gtid ];
1441 taskdata = thread -> th.th_current_task;
1442 // Should we model this as a task wait or not?
1443#if USE_ITT_BUILD
1444 // Note: These values are used by ITT events as well.
1445#endif /* USE_ITT_BUILD */
1446 taskdata->td_taskwait_counter += 1;
1447 taskdata->td_taskwait_ident = loc_ref;
1448 taskdata->td_taskwait_thread = gtid + 1;
1449
1450#if USE_ITT_BUILD
1451 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1452 if ( itt_sync_obj != NULL )
1453 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1454#endif /* USE_ITT_BUILD */
1455 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001456 kmp_task_team_t * task_team = thread->th.th_task_team;
1457 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001458 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001459 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1460 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1461 }
1462 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001463 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001464#if USE_ITT_BUILD
1465 if ( itt_sync_obj != NULL )
1466 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1467#endif /* USE_ITT_BUILD */
1468
1469 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1470 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1471 }
1472
1473 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1474 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1475
1476 return TASK_CURRENT_NOT_QUEUED;
1477}
1478
1479
1480#if OMP_40_ENABLED
1481//-------------------------------------------------------------------------------------
1482// __kmpc_taskgroup: Start a new taskgroup
1483
1484void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001485__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001486{
1487 kmp_info_t * thread = __kmp_threads[ gtid ];
1488 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1489 kmp_taskgroup_t * tg_new =
1490 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1491 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1492 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001493 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001494 tg_new->parent = taskdata->td_taskgroup;
1495 taskdata->td_taskgroup = tg_new;
1496}
1497
1498
1499//-------------------------------------------------------------------------------------
1500// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1501// and its descendants are complete
1502
1503void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001504__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001505{
1506 kmp_info_t * thread = __kmp_threads[ gtid ];
1507 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1508 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1509 int thread_finished = FALSE;
1510
1511 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1512 KMP_DEBUG_ASSERT( taskgroup != NULL );
1513
1514 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1515#if USE_ITT_BUILD
1516 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1517 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1518 if ( itt_sync_obj != NULL )
1519 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1520#endif /* USE_ITT_BUILD */
1521
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001522#if OMP_41_ENABLED
1523 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1524#else
1525 if ( ! taskdata->td_flags.team_serial )
1526#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001527 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001528 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001529 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001530 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1531 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001532 }
1533 }
1534
1535#if USE_ITT_BUILD
1536 if ( itt_sync_obj != NULL )
1537 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1538#endif /* USE_ITT_BUILD */
1539 }
1540 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1541
1542 // Restore parent taskgroup for the current task
1543 taskdata->td_taskgroup = taskgroup->parent;
1544 __kmp_thread_free( thread, taskgroup );
1545
1546 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1547}
1548#endif
1549
1550
1551//------------------------------------------------------
1552// __kmp_remove_my_task: remove a task from my own deque
1553
1554static kmp_task_t *
1555__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1556 kmp_int32 is_constrained )
1557{
1558 kmp_task_t * task;
1559 kmp_taskdata_t * taskdata;
1560 kmp_thread_data_t *thread_data;
1561 kmp_uint32 tail;
1562
1563 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1564 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1565
1566 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1567
1568 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1569 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1570 thread_data->td.td_deque_tail) );
1571
1572 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1573 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1574 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1575 thread_data->td.td_deque_tail) );
1576 return NULL;
1577 }
1578
1579 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1580
1581 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1582 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1583 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1584 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1585 thread_data->td.td_deque_tail) );
1586 return NULL;
1587 }
1588
1589 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1590 taskdata = thread_data -> td.td_deque[ tail ];
1591
1592 if (is_constrained) {
1593 // we need to check if the candidate obeys task scheduling constraint:
1594 // only child of current task can be scheduled
1595 kmp_taskdata_t * current = thread->th.th_current_task;
1596 kmp_int32 level = current->td_level;
1597 kmp_taskdata_t * parent = taskdata->td_parent;
1598 while ( parent != current && parent->td_level > level ) {
1599 parent = parent->td_parent; // check generation up to the level of the current task
1600 KMP_DEBUG_ASSERT(parent != NULL);
1601 }
1602 if ( parent != current ) {
1603 // If the tail task is not a child, then no other childs can appear in the deque.
1604 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1605 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1606 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1607 thread_data->td.td_deque_tail) );
1608 return NULL;
1609 }
1610 }
1611
1612 thread_data -> td.td_deque_tail = tail;
1613 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1614
1615 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1616
1617 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1618 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1619 thread_data->td.td_deque_tail) );
1620
1621 task = KMP_TASKDATA_TO_TASK( taskdata );
1622 return task;
1623}
1624
1625
1626//-----------------------------------------------------------
1627// __kmp_steal_task: remove a task from another thread's deque
1628// Assume that calling thread has already checked existence of
1629// task_team thread_data before calling this routine.
1630
1631static kmp_task_t *
1632__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1633 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1634 kmp_int32 is_constrained )
1635{
1636 kmp_task_t * task;
1637 kmp_taskdata_t * taskdata;
1638 kmp_thread_data_t *victim_td, *threads_data;
Jonathan Peyton7c4d66d2015-06-08 20:01:14 +00001639 kmp_int32 victim_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001640
1641 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1642
1643 threads_data = task_team -> tt.tt_threads_data;
1644 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1645
1646 victim_tid = victim->th.th_info.ds.ds_tid;
1647 victim_td = & threads_data[ victim_tid ];
1648
1649 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1650 "head=%u tail=%u\n",
1651 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1652 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1653
1654 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1655 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1656 {
1657 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1658 "ntasks=%d head=%u tail=%u\n",
1659 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1660 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1661 return NULL;
1662 }
1663
1664 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1665
1666 // Check again after we acquire the lock
1667 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1668 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1669 {
1670 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1671 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1672 "ntasks=%d head=%u tail=%u\n",
1673 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1674 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1675 return NULL;
1676 }
1677
1678 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1679
1680 if ( !is_constrained ) {
1681 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1682 // Bump head pointer and Wrap.
1683 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1684 } else {
1685 // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1686 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1687 taskdata = victim_td -> td.td_deque[ tail ];
1688 // we need to check if the candidate obeys task scheduling constraint:
1689 // only child of current task can be scheduled
1690 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1691 kmp_int32 level = current->td_level;
1692 kmp_taskdata_t * parent = taskdata->td_parent;
1693 while ( parent != current && parent->td_level > level ) {
1694 parent = parent->td_parent; // check generation up to the level of the current task
1695 KMP_DEBUG_ASSERT(parent != NULL);
1696 }
1697 if ( parent != current ) {
1698 // If the tail task is not a child, then no other childs can appear in the deque (?).
1699 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1700 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1701 "ntasks=%d head=%u tail=%u\n",
1702 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1703 task_team, victim_td->td.td_deque_ntasks,
1704 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1705 return NULL;
1706 }
1707 victim_td -> td.td_deque_tail = tail;
1708 }
1709 if (*thread_finished) {
1710 // We need to un-mark this victim as a finished victim. This must be done before
1711 // releasing the lock, or else other threads (starting with the master victim)
1712 // might be prematurely released from the barrier!!!
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001713 kmp_uint32 count;
1714
1715 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001716
1717 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1718 gtid, count + 1, task_team) );
1719
1720 *thread_finished = FALSE;
1721 }
1722 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1723
1724 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1725
Jonathan Peyton45be4502015-08-11 21:36:41 +00001726 KMP_COUNT_BLOCK(TASK_stolen);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001727 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001728 "ntasks=%d head=%u tail=%u\n",
1729 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1730 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1731 victim_td->td.td_deque_tail) );
1732
1733 task = KMP_TASKDATA_TO_TASK( taskdata );
1734 return task;
1735}
1736
1737
1738//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001739// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001740// is statisfied (return true) or there are none left (return false).
1741// final_spin is TRUE if this is the spin at the release barrier.
1742// thread_finished indicates whether the thread is finished executing all
1743// the tasks it has on its deque, and is at the release barrier.
1744// spinner is the location on which to spin.
1745// spinner == NULL means only execute a single task and return.
1746// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001747template <class C>
1748static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1749 int *thread_finished
1750 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001751{
1752 kmp_task_team_t * task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001753 kmp_thread_data_t * threads_data;
1754 kmp_task_t * task;
1755 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1756 volatile kmp_uint32 * unfinished_threads;
1757 kmp_int32 nthreads, last_stolen, k, tid;
1758
1759 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1760 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1761
1762 task_team = thread -> th.th_task_team;
Jonathan Peyton54127982015-11-04 21:37:48 +00001763 if (task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001764
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001765 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001766 gtid, final_spin, *thread_finished) );
1767
1768 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1769 KMP_DEBUG_ASSERT( threads_data != NULL );
1770
1771 nthreads = task_team -> tt.tt_nproc;
1772 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001773#if OMP_41_ENABLED
1774 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1775#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001776 KMP_DEBUG_ASSERT( nthreads > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001777#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001778 KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1779
1780 // Choose tasks from our own work queue.
1781 start:
1782 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1783#if USE_ITT_BUILD && USE_ITT_NOTIFY
1784 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1785 if ( itt_sync_obj == NULL ) {
1786 // we are at fork barrier where we could not get the object reliably
1787 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1788 }
1789 __kmp_itt_task_starting( itt_sync_obj );
1790 }
1791#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1792 __kmp_invoke_task( gtid, task, current_task );
1793#if USE_ITT_BUILD
1794 if ( itt_sync_obj != NULL )
1795 __kmp_itt_task_finished( itt_sync_obj );
1796#endif /* USE_ITT_BUILD */
1797
1798 // If this thread is only partway through the barrier and the condition
1799 // is met, then return now, so that the barrier gather/release pattern can proceed.
1800 // If this thread is in the last spin loop in the barrier, waiting to be
1801 // released, we know that the termination condition will not be satisified,
1802 // so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001803 if (flag == NULL || (!final_spin && flag->done_check())) {
1804 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001805 return TRUE;
1806 }
Jonathan Peyton54127982015-11-04 21:37:48 +00001807 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001808 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1809 }
1810
1811 // This thread's work queue is empty. If we are in the final spin loop
1812 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001813#if OMP_41_ENABLED
1814 // The work queue may be empty but there might be proxy tasks still executing
1815 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1816#else
1817 if (final_spin)
1818#endif
1819 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001820 // First, decrement the #unfinished threads, if that has not already
1821 // been done. This decrement might be to the spin location, and
1822 // result in the termination condition being satisfied.
1823 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001824 kmp_uint32 count;
1825
1826 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001827 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001828 gtid, count, task_team) );
1829 *thread_finished = TRUE;
1830 }
1831
1832 // It is now unsafe to reference thread->th.th_team !!!
1833 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1834 // thread to pass through the barrier, where it might reset each thread's
1835 // th.th_team field for the next parallel region.
1836 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001837 if (flag != NULL && flag->done_check()) {
1838 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001839 return TRUE;
1840 }
1841 }
1842
Jonathan Peyton54127982015-11-04 21:37:48 +00001843 if (thread->th.th_task_team == NULL) return FALSE;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001844#if OMP_41_ENABLED
1845 // check if there are other threads to steal from, otherwise go back
1846 if ( nthreads == 1 )
1847 goto start;
1848#endif
1849
Jim Cownie5e8470a2013-09-27 10:38:44 +00001850 // Try to steal from the last place I stole from successfully.
1851 tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1852 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1853
1854 if (last_stolen != -1) {
1855 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1856
1857 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1858 thread_finished, is_constrained )) != NULL)
1859 {
1860#if USE_ITT_BUILD && USE_ITT_NOTIFY
1861 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1862 if ( itt_sync_obj == NULL ) {
1863 // we are at fork barrier where we could not get the object reliably
1864 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1865 }
1866 __kmp_itt_task_starting( itt_sync_obj );
1867 }
1868#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1869 __kmp_invoke_task( gtid, task, current_task );
1870#if USE_ITT_BUILD
1871 if ( itt_sync_obj != NULL )
1872 __kmp_itt_task_finished( itt_sync_obj );
1873#endif /* USE_ITT_BUILD */
1874
1875 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001876 if (flag == NULL || (!final_spin && flag->done_check())) {
1877 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001878 gtid) );
1879 return TRUE;
1880 }
1881
Jonathan Peyton54127982015-11-04 21:37:48 +00001882 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001883 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1884 // If the execution of the stolen task resulted in more tasks being
1885 // placed on our run queue, then restart the whole process.
1886 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001887 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001888 gtid) );
1889 goto start;
1890 }
1891 }
1892
1893 // Don't give priority to stealing from this thread anymore.
1894 threads_data[ tid ].td.td_deque_last_stolen = -1;
1895
1896 // The victims's work queue is empty. If we are in the final spin loop
1897 // of the barrier, check and see if the termination condition is satisfied.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001898#if OMP_41_ENABLED
1899 // The work queue may be empty but there might be proxy tasks still executing
1900 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1901#else
1902 if (final_spin)
1903#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001904 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001905 // First, decrement the #unfinished threads, if that has not already
1906 // been done. This decrement might be to the spin location, and
1907 // result in the termination condition being satisfied.
1908 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00001909 kmp_uint32 count;
1910
1911 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001912 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001913 "task_team=%p\n", gtid, count, task_team) );
1914 *thread_finished = TRUE;
1915 }
1916
1917 // If __kmp_tasking_mode != tskm_immediate_exec
1918 // then it is now unsafe to reference thread->th.th_team !!!
1919 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1920 // thread to pass through the barrier, where it might reset each thread's
1921 // th.th_team field for the next parallel region.
1922 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001923 if (flag != NULL && flag->done_check()) {
1924 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001925 gtid) );
1926 return TRUE;
1927 }
1928 }
Jonathan Peyton54127982015-11-04 21:37:48 +00001929 if (thread->th.th_task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001930 }
1931
1932 // Find a different thread to steal work from. Pick a random thread.
1933 // My initial plan was to cycle through all the threads, and only return
1934 // if we tried to steal from every thread, and failed. Arch says that's
1935 // not such a great idea.
1936 // GEH - need yield code in this loop for throughput library mode?
1937 new_victim:
1938 k = __kmp_get_random( thread ) % (nthreads - 1);
1939 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1940 ++k; // Adjusts random distribution to exclude self
1941 }
1942 {
1943 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1944 int first;
1945
1946 // There is a slight chance that __kmp_enable_tasking() did not wake up
1947 // all threads waiting at the barrier. If this thread is sleeping, then
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001948 // wake it up. Since we were going to pay the cache miss penalty
1949 // for referencing another thread's kmp_info_t struct anyway, the check
Jim Cownie5e8470a2013-09-27 10:38:44 +00001950 // shouldn't cost too much performance at this point.
1951 // In extra barrier mode, tasks do not sleep at the separate tasking
1952 // barrier, so this isn't a problem.
1953 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1954 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1955 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1956 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001957 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001958 // A sleeping thread should not have any tasks on it's queue.
Alp Toker8f2d3f02014-02-24 10:40:15 +00001959 // There is a slight possibility that it resumes, steals a task from
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001960 // another thread, which spawns more tasks, all in the time that it takes
Jim Cownie5e8470a2013-09-27 10:38:44 +00001961 // this thread to check => don't write an assertion that the victim's
1962 // queue is empty. Try stealing from a different thread.
1963 goto new_victim;
1964 }
1965
1966 // Now try to steal work from the selected thread
1967 first = TRUE;
1968 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1969 thread_finished, is_constrained )) != NULL)
1970 {
1971#if USE_ITT_BUILD && USE_ITT_NOTIFY
1972 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1973 if ( itt_sync_obj == NULL ) {
1974 // we are at fork barrier where we could not get the object reliably
1975 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1976 }
1977 __kmp_itt_task_starting( itt_sync_obj );
1978 }
1979#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1980 __kmp_invoke_task( gtid, task, current_task );
1981#if USE_ITT_BUILD
1982 if ( itt_sync_obj != NULL )
1983 __kmp_itt_task_finished( itt_sync_obj );
1984#endif /* USE_ITT_BUILD */
1985
1986 // Try stealing from this victim again, in the future.
1987 if (first) {
1988 threads_data[ tid ].td.td_deque_last_stolen = k;
1989 first = FALSE;
1990 }
1991
1992 // Check to see if this thread can proceed.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001993 if (flag == NULL || (!final_spin && flag->done_check())) {
1994 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995 gtid) );
1996 return TRUE;
1997 }
Jonathan Peyton54127982015-11-04 21:37:48 +00001998 if (thread->th.th_task_team == NULL) break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001999 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
2000
2001 // If the execution of the stolen task resulted in more tasks being
2002 // placed on our run queue, then restart the whole process.
2003 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002004 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00002005 gtid) );
2006 goto start;
2007 }
2008 }
2009
2010 // The victims's work queue is empty. If we are in the final spin loop
2011 // of the barrier, check and see if the termination condition is satisfied.
2012 // Going on and finding a new victim to steal from is expensive, as it
2013 // involves a lot of cache misses, so we definitely want to re-check the
2014 // termination condition before doing that.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002015#if OMP_41_ENABLED
2016 // The work queue may be empty but there might be proxy tasks still executing
2017 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
2018#else
2019 if (final_spin)
2020#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002021 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002022 // First, decrement the #unfinished threads, if that has not already
2023 // been done. This decrement might be to the spin location, and
2024 // result in the termination condition being satisfied.
2025 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002026 kmp_uint32 count;
2027
2028 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002029 KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
Jim Cownie5e8470a2013-09-27 10:38:44 +00002030 "task_team=%p\n",
2031 gtid, count, task_team) );
2032 *thread_finished = TRUE;
2033 }
2034
2035 // If __kmp_tasking_mode != tskm_immediate_exec,
2036 // then it is now unsafe to reference thread->th.th_team !!!
2037 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
2038 // thread to pass through the barrier, where it might reset each thread's
2039 // th.th_team field for the next parallel region.
2040 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002041 if (flag != NULL && flag->done_check()) {
2042 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002043 return TRUE;
2044 }
2045 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002046 if (thread->th.th_task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002047 }
2048
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002049 KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002050 return FALSE;
2051}
2052
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002053int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
2054 int *thread_finished
2055 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2056{
2057 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2058 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2059}
2060
2061int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
2062 int *thread_finished
2063 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2064{
2065 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2066 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2067}
2068
2069int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
2070 int *thread_finished
2071 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2072{
2073 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2074 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2075}
2076
2077
Jim Cownie5e8470a2013-09-27 10:38:44 +00002078
2079//-----------------------------------------------------------------------------
2080// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2081// next barrier so they can assist in executing enqueued tasks.
2082// First thread in allocates the task team atomically.
2083
2084static void
2085__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2086{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002087 kmp_thread_data_t *threads_data;
2088 int nthreads, i, is_init_thread;
2089
2090 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2091 __kmp_gtid_from_thread( this_thr ) ) );
2092
2093 KMP_DEBUG_ASSERT(task_team != NULL);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002094 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002095
2096 nthreads = task_team->tt.tt_nproc;
2097 KMP_DEBUG_ASSERT(nthreads > 0);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002098 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002099
2100 // Allocate or increase the size of threads_data if necessary
2101 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2102
2103 if (!is_init_thread) {
2104 // Some other thread already set up the array.
2105 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2106 __kmp_gtid_from_thread( this_thr ) ) );
2107 return;
2108 }
2109 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2110 KMP_DEBUG_ASSERT( threads_data != NULL );
2111
2112 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2113 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2114 {
2115 // Release any threads sleeping at the barrier, so that they can steal
2116 // tasks and execute them. In extra barrier mode, tasks do not sleep
2117 // at the separate tasking barrier, so this isn't a problem.
2118 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002119 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002120 kmp_info_t *thread = threads_data[i].td.td_thr;
2121
2122 if (i == this_thr->th.th_info.ds.ds_tid) {
2123 continue;
2124 }
2125 // Since we haven't locked the thread's suspend mutex lock at this
2126 // point, there is a small window where a thread might be putting
2127 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002128 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002129 // see if other threads are sleeping (using the same random
2130 // mechanism that is used for task stealing) and awakens them if
2131 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002132 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002133 {
2134 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2135 __kmp_gtid_from_thread( this_thr ),
2136 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002137 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002138 }
2139 else {
2140 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2141 __kmp_gtid_from_thread( this_thr ),
2142 __kmp_gtid_from_thread( thread ) ) );
2143 }
2144 }
2145 }
2146
2147 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2148 __kmp_gtid_from_thread( this_thr ) ) );
2149}
2150
2151
2152/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002153/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00002154 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2155 * like a shadow of the kmp_team_t data struct, with a different lifetime.
2156 * After a child * thread checks into a barrier and calls __kmp_release() from
2157 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2158 * longer assume that the kmp_team_t structure is intact (at any moment, the
2159 * master thread may exit the barrier code and free the team data structure,
2160 * and return the threads to the thread pool).
2161 *
2162 * This does not work with the the tasking code, as the thread is still
2163 * expected to participate in the execution of any tasks that may have been
2164 * spawned my a member of the team, and the thread still needs access to all
2165 * to each thread in the team, so that it can steal work from it.
2166 *
2167 * Enter the existence of the kmp_task_team_t struct. It employs a reference
2168 * counting mechanims, and is allocated by the master thread before calling
2169 * __kmp_<barrier_kind>_release, and then is release by the last thread to
2170 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2171 * of the kmp_task_team_t structs for consecutive barriers can overlap
2172 * (and will, unless the master thread is the last thread to exit the barrier
2173 * release phase, which is not typical).
2174 *
2175 * The existence of such a struct is useful outside the context of tasking,
2176 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2177 * so that any performance differences show up when comparing the 2.5 vs. 3.0
2178 * libraries.
2179 *
2180 * We currently use the existence of the threads array as an indicator that
2181 * tasks were spawned since the last barrier. If the structure is to be
2182 * useful outside the context of tasking, then this will have to change, but
2183 * not settting the field minimizes the performance impact of tasking on
2184 * barriers, when no explicit tasks were spawned (pushed, actually).
2185 */
2186
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002187
Jim Cownie5e8470a2013-09-27 10:38:44 +00002188static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2189// Lock for task team data structures
2190static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2191
2192
2193//------------------------------------------------------------------------------
2194// __kmp_alloc_task_deque:
2195// Allocates a task deque for a particular thread, and initialize the necessary
2196// data structures relating to the deque. This only happens once per thread
2197// per task team since task teams are recycled.
2198// No lock is needed during allocation since each thread allocates its own
2199// deque.
2200
2201static void
2202__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2203{
2204 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2205 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2206
2207 // Initialize last stolen task field to "none"
2208 thread_data -> td.td_deque_last_stolen = -1;
2209
2210 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2211 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2212 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2213
2214 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2215 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2216 // Allocate space for task deque, and zero the deque
2217 // Cannot use __kmp_thread_calloc() because threads not around for
2218 // kmp_reap_task_team( ).
2219 thread_data -> td.td_deque = (kmp_taskdata_t **)
2220 __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2221}
2222
2223
2224//------------------------------------------------------------------------------
2225// __kmp_free_task_deque:
2226// Deallocates a task deque for a particular thread.
2227// Happens at library deallocation so don't need to reset all thread data fields.
2228
2229static void
2230__kmp_free_task_deque( kmp_thread_data_t *thread_data )
2231{
2232 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2233
2234 if ( thread_data -> td.td_deque != NULL ) {
2235 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2236 __kmp_free( thread_data -> td.td_deque );
2237 thread_data -> td.td_deque = NULL;
2238 }
2239 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2240
2241#ifdef BUILD_TIED_TASK_STACK
2242 // GEH: Figure out what to do here for td_susp_tied_tasks
2243 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2244 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2245 }
2246#endif // BUILD_TIED_TASK_STACK
2247}
2248
2249
2250//------------------------------------------------------------------------------
2251// __kmp_realloc_task_threads_data:
2252// Allocates a threads_data array for a task team, either by allocating an initial
2253// array or enlarging an existing array. Only the first thread to get the lock
2254// allocs or enlarges the array and re-initializes the array eleemnts.
2255// That thread returns "TRUE", the rest return "FALSE".
2256// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2257// The current size is given by task_team -> tt.tt_max_threads.
2258
2259static int
2260__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2261{
2262 kmp_thread_data_t ** threads_data_p;
2263 kmp_int32 nthreads, maxthreads;
2264 int is_init_thread = FALSE;
2265
2266 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2267 // Already reallocated and initialized.
2268 return FALSE;
2269 }
2270
2271 threads_data_p = & task_team -> tt.tt_threads_data;
2272 nthreads = task_team -> tt.tt_nproc;
2273 maxthreads = task_team -> tt.tt_max_threads;
2274
2275 // All threads must lock when they encounter the first task of the implicit task
2276 // region to make sure threads_data fields are (re)initialized before used.
2277 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2278
2279 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2280 // first thread to enable tasking
2281 kmp_team_t *team = thread -> th.th_team;
2282 int i;
2283
2284 is_init_thread = TRUE;
2285 if ( maxthreads < nthreads ) {
2286
2287 if ( *threads_data_p != NULL ) {
2288 kmp_thread_data_t *old_data = *threads_data_p;
2289 kmp_thread_data_t *new_data = NULL;
2290
2291 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2292 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2293 __kmp_gtid_from_thread( thread ), task_team,
2294 nthreads, maxthreads ) );
2295 // Reallocate threads_data to have more elements than current array
2296 // Cannot use __kmp_thread_realloc() because threads not around for
2297 // kmp_reap_task_team( ). Note all new array entries are initialized
2298 // to zero by __kmp_allocate().
2299 new_data = (kmp_thread_data_t *)
2300 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2301 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002302 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002303 (void *) old_data,
2304 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002305
2306#ifdef BUILD_TIED_TASK_STACK
2307 // GEH: Figure out if this is the right thing to do
2308 for (i = maxthreads; i < nthreads; i++) {
2309 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2310 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2311 }
2312#endif // BUILD_TIED_TASK_STACK
2313 // Install the new data and free the old data
2314 (*threads_data_p) = new_data;
2315 __kmp_free( old_data );
2316 }
2317 else {
2318 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2319 "threads data for task_team %p, size = %d\n",
2320 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2321 // Make the initial allocate for threads_data array, and zero entries
2322 // Cannot use __kmp_thread_calloc() because threads not around for
2323 // kmp_reap_task_team( ).
2324 *threads_data_p = (kmp_thread_data_t *)
2325 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2326#ifdef BUILD_TIED_TASK_STACK
2327 // GEH: Figure out if this is the right thing to do
2328 for (i = 0; i < nthreads; i++) {
2329 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2330 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2331 }
2332#endif // BUILD_TIED_TASK_STACK
2333 }
2334 task_team -> tt.tt_max_threads = nthreads;
2335 }
2336 else {
2337 // If array has (more than) enough elements, go ahead and use it
2338 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2339 }
2340
2341 // initialize threads_data pointers back to thread_info structures
2342 for (i = 0; i < nthreads; i++) {
2343 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2344 thread_data -> td.td_thr = team -> t.t_threads[i];
2345
2346 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2347 // The last stolen field survives across teams / barrier, and the number
2348 // of threads may have changed. It's possible (likely?) that a new
2349 // parallel region will exhibit the same behavior as the previous region.
2350 thread_data -> td.td_deque_last_stolen = -1;
2351 }
2352 }
2353
2354 KMP_MB();
2355 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2356 }
2357
2358 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2359 return is_init_thread;
2360}
2361
2362
2363//------------------------------------------------------------------------------
2364// __kmp_free_task_threads_data:
2365// Deallocates a threads_data array for a task team, including any attached
2366// tasking deques. Only occurs at library shutdown.
2367
2368static void
2369__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2370{
2371 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2372 if ( task_team -> tt.tt_threads_data != NULL ) {
2373 int i;
2374 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2375 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2376 }
2377 __kmp_free( task_team -> tt.tt_threads_data );
2378 task_team -> tt.tt_threads_data = NULL;
2379 }
2380 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2381}
2382
2383
2384//------------------------------------------------------------------------------
2385// __kmp_allocate_task_team:
2386// Allocates a task team associated with a specific team, taking it from
2387// the global task team free list if possible. Also initializes data structures.
2388
2389static kmp_task_team_t *
2390__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2391{
2392 kmp_task_team_t *task_team = NULL;
2393 int nthreads;
2394
2395 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2396 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2397
2398 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2399 // Take a task team from the task team pool
2400 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2401 if (__kmp_free_task_teams != NULL) {
2402 task_team = __kmp_free_task_teams;
2403 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2404 task_team -> tt.tt_next = NULL;
2405 }
2406 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2407 }
2408
2409 if (task_team == NULL) {
2410 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2411 "task team for team %p\n",
2412 __kmp_gtid_from_thread( thread ), team ) );
2413 // Allocate a new task team if one is not available.
2414 // Cannot use __kmp_thread_malloc() because threads not around for
2415 // kmp_reap_task_team( ).
2416 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2417 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2418 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2419 //task_team -> tt.tt_max_threads = 0;
2420 //task_team -> tt.tt_next = NULL;
2421 }
2422
2423 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002424#if OMP_41_ENABLED
2425 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2426#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002427 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2428
Jim Cownie5e8470a2013-09-27 10:38:44 +00002429 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2430 TCW_4( task_team -> tt.tt_active, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002431
Jonathan Peyton54127982015-11-04 21:37:48 +00002432 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
2433 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002434 return task_team;
2435}
2436
2437
2438//------------------------------------------------------------------------------
2439// __kmp_free_task_team:
2440// Frees the task team associated with a specific thread, and adds it
2441// to the global task team free list.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002442
Jonathan Peyton54127982015-11-04 21:37:48 +00002443void
Jim Cownie5e8470a2013-09-27 10:38:44 +00002444__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2445{
2446 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2447 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2448
Jim Cownie5e8470a2013-09-27 10:38:44 +00002449 // Put task team back on free list
2450 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2451
2452 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2453 task_team -> tt.tt_next = __kmp_free_task_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002454 TCW_PTR(__kmp_free_task_teams, task_team);
2455
2456 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2457}
2458
2459
2460//------------------------------------------------------------------------------
2461// __kmp_reap_task_teams:
2462// Free all the task teams on the task team free list.
2463// Should only be done during library shutdown.
2464// Cannot do anything that needs a thread structure or gtid since they are already gone.
2465
2466void
2467__kmp_reap_task_teams( void )
2468{
2469 kmp_task_team_t *task_team;
2470
2471 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2472 // Free all task_teams on the free list
2473 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2474 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2475 __kmp_free_task_teams = task_team -> tt.tt_next;
2476 task_team -> tt.tt_next = NULL;
2477
2478 // Free threads_data if necessary
2479 if ( task_team -> tt.tt_threads_data != NULL ) {
2480 __kmp_free_task_threads_data( task_team );
2481 }
2482 __kmp_free( task_team );
2483 }
2484 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2485 }
2486}
2487
Jim Cownie5e8470a2013-09-27 10:38:44 +00002488//------------------------------------------------------------------------------
2489// __kmp_wait_to_unref_task_teams:
2490// Some threads could still be in the fork barrier release code, possibly
2491// trying to steal tasks. Wait for each thread to unreference its task team.
2492//
2493void
2494__kmp_wait_to_unref_task_teams(void)
2495{
2496 kmp_info_t *thread;
2497 kmp_uint32 spins;
2498 int done;
2499
2500 KMP_INIT_YIELD( spins );
2501
Jim Cownie5e8470a2013-09-27 10:38:44 +00002502 for (;;) {
2503 done = TRUE;
2504
2505 // TODO: GEH - this may be is wrong because some sync would be necessary
2506 // in case threads are added to the pool during the traversal.
2507 // Need to verify that lock for thread pool is held when calling
2508 // this routine.
2509 for (thread = (kmp_info_t *)__kmp_thread_pool;
2510 thread != NULL;
2511 thread = thread->th.th_next_pool)
2512 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002513#if KMP_OS_WINDOWS
2514 DWORD exit_val;
2515#endif
2516 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2517 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2518 __kmp_gtid_from_thread( thread ) ) );
2519 continue;
2520 }
2521#if KMP_OS_WINDOWS
2522 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2523 if (!__kmp_is_thread_alive(thread, &exit_val)) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002524 thread->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002525 continue;
2526 }
2527#endif
2528
2529 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2530
2531 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2532 __kmp_gtid_from_thread( thread ) ) );
2533
2534 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002535 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002536 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002537 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002538 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2539 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002540 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002541 }
2542 }
2543 }
2544 if (done) {
2545 break;
2546 }
2547
2548 // If we are oversubscribed,
2549 // or have waited a bit (and library mode is throughput), yield.
2550 // Pause is in the following code.
2551 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2552 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2553 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002554}
2555
2556
2557//------------------------------------------------------------------------------
2558// __kmp_task_team_setup: Create a task_team for the current team, but use
2559// an already created, unused one if it already exists.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002560void
Jonathan Peyton54127982015-11-04 21:37:48 +00002561__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002562{
2563 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2564
Jonathan Peyton54127982015-11-04 21:37:48 +00002565 // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
2566 // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
2567 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002568 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002569 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002570 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002571 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002572 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002573
2574 // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is
2575 // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
2576 // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
2577 // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for
2578 // serialized teams.
Jonathan Peytone1dad192015-11-30 20:05:13 +00002579 if (team->t.t_nproc > 1) {
2580 int other_team = 1 - this_thr->th.th_task_state;
2581 if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
2582 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2583 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2584 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2585 ((team != NULL) ? team->t.t_id : -1), other_team ));
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002586 }
Jonathan Peytone1dad192015-11-30 20:05:13 +00002587 else { // Leave the old task team struct in place for the upcoming region; adjust as needed
2588 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2589 if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
2590 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2591 TCW_4(task_team->tt.tt_found_tasks, FALSE);
2592#if OMP_41_ENABLED
2593 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2594#endif
2595 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
2596 TCW_4(task_team->tt.tt_active, TRUE );
2597 }
2598 // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
2599 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
2600 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2601 ((team != NULL) ? team->t.t_id : -1), other_team ));
2602 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002603 }
2604}
2605
2606
2607//------------------------------------------------------------------------------
2608// __kmp_task_team_sync: Propagation of task team data from team to threads
2609// which happens just after the release phase of a team barrier. This may be
2610// called by any thread, but only for teams with # threads > 1.
2611
2612void
2613__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2614{
2615 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2616
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002617 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002618 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002619 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2620 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jonathan Peyton54127982015-11-04 21:37:48 +00002621 KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002622 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2623 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002624}
2625
2626
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002627//--------------------------------------------------------------------------------------------
2628// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
Jonathan Peyton54127982015-11-04 21:37:48 +00002629// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created.
2630// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
2631// optionally as the last argument. When wait is zero, master thread does not wait for
2632// unfinished_threads to reach 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002633void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002634__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002635 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jonathan Peyton54127982015-11-04 21:37:48 +00002636 , int wait)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002637{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002638 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002639
2640 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2641 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2642
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002643 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002644 if (wait) {
2645 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2646 __kmp_gtid_from_thread(this_thr), task_team));
2647 // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
2648 // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
2649 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2650 flag.wait(this_thr, TRUE
2651 USE_ITT_BUILD_ARG(itt_sync_obj));
2652 }
2653 // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
2654 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
2655 "setting active to false, setting local and team's pointer to NULL\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002656 __kmp_gtid_from_thread(this_thr), task_team));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002657#if OMP_41_ENABLED
2658 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2659 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2660#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002661 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002662#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002663 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2664 KMP_MB();
2665
2666 TCW_PTR(this_thr->th.th_task_team, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002667 }
2668}
2669
2670
2671//------------------------------------------------------------------------------
2672// __kmp_tasking_barrier:
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002673// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002674// Internal function to execute all tasks prior to a regular barrier or a
2675// join barrier. It is a full barrier itself, which unfortunately turns
2676// regular barriers into double barriers and join barriers into 1 1/2
2677// barriers.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002678void
2679__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2680{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002681 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002682 int flag = FALSE;
2683 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2684
2685#if USE_ITT_BUILD
2686 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2687#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002688 kmp_flag_32 spin_flag(spin, 0U);
2689 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2690 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002691#if USE_ITT_BUILD
2692 // TODO: What about itt_sync_obj??
2693 KMP_FSYNC_SPIN_PREPARE( spin );
2694#endif /* USE_ITT_BUILD */
2695
2696 if( TCR_4(__kmp_global.g.g_done) ) {
2697 if( __kmp_global.g.g_abort )
2698 __kmp_abort_thread( );
2699 break;
2700 }
2701 KMP_YIELD( TRUE ); // GH: We always yield here
2702 }
2703#if USE_ITT_BUILD
2704 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2705#endif /* USE_ITT_BUILD */
2706}
2707
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002708
2709#if OMP_41_ENABLED
2710
2711/* __kmp_give_task puts a task into a given thread queue if:
Jonathan Peytonff684e42016-02-11 22:58:29 +00002712 - the queue for that thread was created
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002713 - there's space in that queue
2714
2715 Because of this, __kmp_push_task needs to check if there's space after getting the lock
2716 */
2717static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2718{
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002719 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002720 kmp_task_team_t * task_team = taskdata->td_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002721
2722 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2723
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002724 // If task_team is NULL something went really bad...
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002725 KMP_DEBUG_ASSERT( task_team != NULL );
2726
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002727 bool result = false;
2728 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2729
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002730 if (thread_data -> td.td_deque == NULL ) {
2731 // There's no queue in this thread, go find another one
2732 // We're guaranteed that at least one thread has a queue
2733 KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2734 return result;
2735 }
2736
2737 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2738 {
2739 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2740 return result;
2741 }
2742
2743 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2744
2745 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2746 {
2747 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2748 goto release_and_exit;
2749 }
2750
2751 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2752 // Wrap index.
2753 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2754 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2755
2756 result = true;
Jonathan Peyton1406f012015-05-22 22:35:51 +00002757 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002758
2759release_and_exit:
2760 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2761
2762 return result;
2763}
2764
2765
2766/* The finish of the a proxy tasks is divided in two pieces:
2767 - the top half is the one that can be done from a thread outside the team
2768 - the bottom half must be run from a them within the team
2769
2770 In order to run the bottom half the task gets queued back into one of the threads of the team.
2771 Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2772 So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2773 - things that can be run before queuing the bottom half
2774 - things that must be run after queuing the bottom half
2775
2776 This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2777 we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2778*/
2779
2780static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2781{
2782 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2783 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2784 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2785 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2786
2787 taskdata -> td_flags.complete = 1; // mark the task as completed
2788
2789 if ( taskdata->td_taskgroup )
2790 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2791
2792 // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
2793 TCR_4(taskdata->td_incomplete_child_tasks++);
2794}
2795
2796static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2797{
2798 kmp_int32 children = 0;
2799
2800 // Predecrement simulated by "- 1" calculation
2801 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2802 KMP_DEBUG_ASSERT( children >= 0 );
2803
2804 // Remove the imaginary children
2805 TCR_4(taskdata->td_incomplete_child_tasks--);
2806}
2807
2808static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2809{
2810 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2811 kmp_info_t * thread = __kmp_threads[ gtid ];
2812
2813 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2814 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2815
2816 // We need to wait to make sure the top half is finished
2817 // Spinning here should be ok as this should happen quickly
2818 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2819
2820 __kmp_release_deps(gtid,taskdata);
2821 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2822}
2823
2824/*!
2825@ingroup TASKING
2826@param gtid Global Thread ID of encountering thread
2827@param ptask Task which execution is completed
2828
2829Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
2830*/
2831void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2832{
2833 KMP_DEBUG_ASSERT( ptask != NULL );
2834 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2835 KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2836
2837 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2838
2839 __kmp_first_top_half_finish_proxy(taskdata);
2840 __kmp_second_top_half_finish_proxy(taskdata);
2841 __kmp_bottom_half_finish_proxy(gtid,ptask);
2842
2843 KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2844}
2845
2846/*!
2847@ingroup TASKING
2848@param ptask Task which execution is completed
2849
2850Execute the completation of a proxy task from a thread that could not belong to the team.
2851*/
2852void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2853{
2854 KMP_DEBUG_ASSERT( ptask != NULL );
2855 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2856
2857 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2858
2859 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2860
2861 __kmp_first_top_half_finish_proxy(taskdata);
2862
Jonathan Peytonff684e42016-02-11 22:58:29 +00002863 // Enqueue task to complete bottom half completion from a thread within the corresponding team
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002864 kmp_team_t * team = taskdata->td_team;
2865 kmp_int32 nthreads = team->t.t_nproc;
2866 kmp_info_t *thread;
2867 kmp_int32 k = 0;
2868
2869 do {
Jonathan Peyton1406f012015-05-22 22:35:51 +00002870 //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002871 //For now we're just linearly trying to find a thread
2872 k = (k+1) % nthreads;
2873 thread = team->t.t_threads[k];
2874 } while ( !__kmp_give_task( thread, k, ptask ) );
2875
2876 __kmp_second_top_half_finish_proxy(taskdata);
2877
2878 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2879}
2880
Jonathan Peyton283a2152016-03-02 22:47:51 +00002881//---------------------------------------------------------------------------------
2882// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task for taskloop
2883//
2884// thread: allocating thread
2885// task_src: pointer to source task to be duplicated
2886// returns: a pointer to the allocated kmp_task_t structure (task).
2887kmp_task_t *
2888__kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src )
2889{
2890 kmp_task_t *task;
2891 kmp_taskdata_t *taskdata;
2892 kmp_taskdata_t *taskdata_src;
2893 kmp_taskdata_t *parent_task = thread->th.th_current_task;
2894 size_t shareds_offset;
2895 size_t task_size;
2896
2897 KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) );
2898 taskdata_src = KMP_TASK_TO_TASKDATA( task_src );
2899 KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL ); // it should not be proxy task
2900 KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT );
2901 task_size = taskdata_src->td_size_alloc;
2902
2903 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
2904 KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) );
2905 #if USE_FAST_MEMORY
2906 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size );
2907 #else
2908 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size );
2909 #endif /* USE_FAST_MEMORY */
2910 KMP_MEMCPY(taskdata, taskdata_src, task_size);
2911
2912 task = KMP_TASKDATA_TO_TASK(taskdata);
2913
2914 // Initialize new task (only specific fields not affected by memcpy)
2915 taskdata->td_task_id = KMP_GEN_TASK_ID();
2916 if( task->shareds != NULL ) { // need setup shareds pointer
2917 shareds_offset = (char*)task_src->shareds - (char*)taskdata_src;
2918 task->shareds = &((char*)taskdata)[shareds_offset];
2919 KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (sizeof(void*)-1)) == 0 );
2920 }
2921 taskdata->td_alloc_thread = thread;
2922 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
2923
2924 // Only need to keep track of child task counts if team parallel and tasking not serialized
2925 if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) {
2926 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
2927 if ( parent_task->td_taskgroup )
2928 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
2929 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
2930 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT )
2931 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
2932 }
2933
2934 KA_TRACE(20, ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
2935 thread, taskdata, taskdata->td_parent) );
2936#if OMPT_SUPPORT
2937 __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (void*)task->routine);
2938#endif
2939 return task;
2940}
2941
2942// Routine optionally generated by th ecompiler for setting the lastprivate flag
2943// and calling needed constructors for private/firstprivate objects
2944// (used to form taskloop tasks from pattern task)
2945typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
2946
2947//---------------------------------------------------------------------------------
2948// __kmp_taskloop_linear: Start tasks of the taskloop linearly
2949//
2950// loc Source location information
2951// gtid Global thread ID
2952// task Task with whole loop iteration range
2953// lb Pointer to loop lower bound
2954// ub Pointer to loop upper bound
2955// st Loop stride
2956// sched Schedule specified 0/1/2 for none/grainsize/num_tasks
2957// grainsize Schedule value if specified
2958// task_dup Tasks duplication routine
2959void
2960__kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
2961 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
2962 int sched, kmp_uint64 grainsize, void *task_dup )
2963{
2964 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
2965 kmp_uint64 tc;
2966 kmp_uint64 lower = *lb; // compiler provides global bounds here
2967 kmp_uint64 upper = *ub;
2968 kmp_uint64 i, num_tasks, extras;
2969 kmp_info_t *thread = __kmp_threads[gtid];
2970 kmp_taskdata_t *current_task = thread->th.th_current_task;
2971 kmp_task_t *next_task;
2972 kmp_int32 lastpriv = 0;
2973 size_t lower_offset = (char*)lb - (char*)task; // remember offset of lb in the task structure
2974 size_t upper_offset = (char*)ub - (char*)task; // remember offset of ub in the task structure
2975
2976 // compute trip count
2977 if ( st == 1 ) { // most common case
2978 tc = upper - lower + 1;
2979 } else if ( st < 0 ) {
2980 tc = (lower - upper) / (-st) + 1;
2981 } else { // st > 0
2982 tc = (upper - lower) / st + 1;
2983 }
2984 if(tc == 0) {
2985 // free the pattern task and exit
2986 __kmp_task_start( gtid, task, current_task );
2987 // do not execute anything for zero-trip loop
2988 __kmp_task_finish( gtid, task, current_task );
2989 return;
2990 }
2991
2992 // compute num_tasks/grainsize based on the input provided
2993 switch( sched ) {
2994 case 0: // no schedule clause specified, we can choose the default
2995 // let's try to schedule (team_size*10) tasks
2996 grainsize = thread->th.th_team_nproc * 10;
2997 case 2: // num_tasks provided
2998 if( grainsize > tc ) {
2999 num_tasks = tc; // too big num_tasks requested, adjust values
3000 grainsize = 1;
3001 extras = 0;
3002 } else {
3003 num_tasks = grainsize;
3004 grainsize = tc / num_tasks;
3005 extras = tc % num_tasks;
3006 }
3007 break;
3008 case 1: // grainsize provided
3009 if( grainsize > tc ) {
3010 num_tasks = 1; // too big grainsize requested, adjust values
3011 grainsize = tc;
3012 extras = 0;
3013 } else {
3014 num_tasks = tc / grainsize;
3015 grainsize = tc / num_tasks; // adjust grainsize for balanced distribution of iterations
3016 extras = tc % num_tasks;
3017 }
3018 break;
3019 default:
3020 KMP_ASSERT2(0, "unknown scheduling of taskloop");
3021 }
3022 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3023 KMP_DEBUG_ASSERT(num_tasks > extras);
3024 KMP_DEBUG_ASSERT(num_tasks > 0);
3025
3026 // Main loop, launch num_tasks tasks, assign grainsize iterations each task
3027 for( i = 0; i < num_tasks; ++i ) {
3028 kmp_uint64 chunk_minus_1;
3029 if( extras == 0 ) {
3030 chunk_minus_1 = grainsize - 1;
3031 } else {
3032 chunk_minus_1 = grainsize;
3033 --extras; // first extras iterations get bigger chunk (grainsize+1)
3034 }
3035 upper = lower + st * chunk_minus_1;
3036 if( i == num_tasks - 1 ) {
3037 // schedule the last task, set lastprivate flag
3038 lastpriv = 1;
3039#if KMP_DEBUG
3040 if( st == 1 )
3041 KMP_DEBUG_ASSERT(upper == *ub);
3042 else if( st > 0 )
3043 KMP_DEBUG_ASSERT(upper+st > *ub);
3044 else
3045 KMP_DEBUG_ASSERT(upper+st < *ub);
3046#endif
3047 }
3048 next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
3049 *(kmp_uint64*)((char*)next_task + lower_offset) = lower; // adjust task-specific bounds
3050 *(kmp_uint64*)((char*)next_task + upper_offset) = upper;
3051 if( ptask_dup != NULL )
3052 ptask_dup(next_task, task, lastpriv); // set lastprivate flag, construct fistprivates, etc.
3053 __kmp_omp_task(gtid, next_task, true); // schedule new task
3054 lower = upper + st; // adjust lower bound for the next iteration
3055 }
3056 // free the pattern task and exit
3057 __kmp_task_start( gtid, task, current_task );
3058 // do not execute the pattern task, just do bookkeeping
3059 __kmp_task_finish( gtid, task, current_task );
3060}
3061
3062/*!
3063@ingroup TASKING
3064@param loc Source location information
3065@param gtid Global thread ID
3066@param task Task structure
3067@param if_val Value of the if clause
3068@param lb Pointer to loop lower bound
3069@param ub Pointer to loop upper bound
3070@param st Loop stride
3071@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
3072@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3073@param grainsize Schedule value if specified
3074@param task_dup Tasks duplication routine
3075
3076Execute the taskloop construct.
3077*/
3078void
3079__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
3080 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3081 int nogroup, int sched, kmp_uint64 grainsize, void *task_dup )
3082{
3083 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
3084 KMP_DEBUG_ASSERT( task != NULL );
3085
3086 KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n",
3087 gtid, taskdata, *lb, *ub, st, grainsize, sched));
3088
3089 // check if clause value first
3090 if( if_val == 0 ) { // if(0) specified, mark task as serial
3091 taskdata->td_flags.task_serial = 1;
3092 taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
3093 }
3094 if( nogroup == 0 ) {
3095 __kmpc_taskgroup( loc, gtid );
3096 }
3097
3098 if( 1 /* AC: use some heuristic here to choose task scheduling method */ ) {
3099 __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup );
3100 }
3101
3102 if( nogroup == 0 ) {
3103 __kmpc_end_taskgroup( loc, gtid );
3104 }
3105 KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
3106}
3107
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003108#endif