blob: d3cf1cce03bac4cbe74b120a6991f5e7e2cc1874 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_tasking.cpp -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000019#include "kmp_wait_release.h"
Jonathan Peyton45be4502015-08-11 21:36:41 +000020#include "kmp_stats.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanove5f44922015-04-29 16:22:07 +000022#if OMPT_SUPPORT
23#include "ompt-specific.h"
24#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000026#include "tsan_annotations.h"
27
Jim Cownie5e8470a2013-09-27 10:38:44 +000028/* ------------------------------------------------------------------------ */
29/* ------------------------------------------------------------------------ */
30
31
32/* forward declaration */
33static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
34static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
35static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
36
Jonathan Peytondf6818b2016-06-14 17:57:47 +000037#ifdef OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +000038static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
39#endif
40
Jim Cownie5e8470a2013-09-27 10:38:44 +000041#ifdef BUILD_TIED_TASK_STACK
42
43//---------------------------------------------------------------------------
44// __kmp_trace_task_stack: print the tied tasks from the task stack in order
45// from top do bottom
46//
47// gtid: global thread identifier for thread containing stack
48// thread_data: thread data for task team thread containing stack
49// threshold: value above which the trace statement triggers
50// location: string identifying call site of this function (for trace)
51
52static void
53__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
54{
55 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
56 kmp_taskdata_t **stack_top = task_stack -> ts_top;
57 kmp_int32 entries = task_stack -> ts_entries;
58 kmp_taskdata_t *tied_task;
59
60 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
61 "first_block = %p, stack_top = %p \n",
62 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
63
64 KMP_DEBUG_ASSERT( stack_top != NULL );
65 KMP_DEBUG_ASSERT( entries > 0 );
66
67 while ( entries != 0 )
68 {
69 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
70 // fix up ts_top if we need to pop from previous block
71 if ( entries & TASK_STACK_INDEX_MASK == 0 )
72 {
73 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
74
75 stack_block = stack_block -> sb_prev;
76 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
77 }
78
79 // finish bookkeeping
80 stack_top--;
81 entries--;
82
83 tied_task = * stack_top;
84
85 KMP_DEBUG_ASSERT( tied_task != NULL );
86 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
87
88 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
89 "stack_top=%p, tied_task=%p\n",
90 location, gtid, entries, stack_top, tied_task ) );
91 }
92 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
93
94 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
95 location, gtid ) );
96}
97
98//---------------------------------------------------------------------------
99// __kmp_init_task_stack: initialize the task stack for the first time
100// after a thread_data structure is created.
101// It should not be necessary to do this again (assuming the stack works).
102//
103// gtid: global thread identifier of calling thread
104// thread_data: thread data for task team thread containing stack
105
106static void
107__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
108{
109 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
110 kmp_stack_block_t *first_block;
111
112 // set up the first block of the stack
113 first_block = & task_stack -> ts_first_block;
114 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
115 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
116
117 // initialize the stack to be empty
118 task_stack -> ts_entries = TASK_STACK_EMPTY;
119 first_block -> sb_next = NULL;
120 first_block -> sb_prev = NULL;
121}
122
123
124//---------------------------------------------------------------------------
125// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
126//
127// gtid: global thread identifier for calling thread
128// thread_data: thread info for thread containing stack
129
130static void
131__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
132{
133 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
134 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
135
136 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
137 // free from the second block of the stack
138 while ( stack_block != NULL ) {
139 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
140
141 stack_block -> sb_next = NULL;
142 stack_block -> sb_prev = NULL;
143 if (stack_block != & task_stack -> ts_first_block) {
144 __kmp_thread_free( thread, stack_block ); // free the block, if not the first
145 }
146 stack_block = next_block;
147 }
148 // initialize the stack to be empty
149 task_stack -> ts_entries = 0;
150 task_stack -> ts_top = NULL;
151}
152
153
154//---------------------------------------------------------------------------
155// __kmp_push_task_stack: Push the tied task onto the task stack.
156// Grow the stack if necessary by allocating another block.
157//
158// gtid: global thread identifier for calling thread
159// thread: thread info for thread containing stack
160// tied_task: the task to push on the stack
161
162static void
163__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
164{
165 // GEH - need to consider what to do if tt_threads_data not allocated yet
166 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
167 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
168 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
169
170 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
171 return; // Don't push anything on stack if team or team tasks are serialized
172 }
173
174 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
175 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
176
177 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
178 gtid, thread, tied_task ) );
179 // Store entry
180 * (task_stack -> ts_top) = tied_task;
181
182 // Do bookkeeping for next push
183 task_stack -> ts_top++;
184 task_stack -> ts_entries++;
185
186 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
187 {
188 // Find beginning of this task block
189 kmp_stack_block_t *stack_block =
190 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
191
192 // Check if we already have a block
193 if ( stack_block -> sb_next != NULL )
194 { // reset ts_top to beginning of next block
195 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
196 }
197 else
198 { // Alloc new block and link it up
199 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
200 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
201
202 task_stack -> ts_top = & new_block -> sb_block[0];
203 stack_block -> sb_next = new_block;
204 new_block -> sb_prev = stack_block;
205 new_block -> sb_next = NULL;
206
207 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
208 gtid, tied_task, new_block ) );
209 }
210 }
211 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
212}
213
214//---------------------------------------------------------------------------
215// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
216// the task, just check to make sure it matches the ending task passed in.
217//
218// gtid: global thread identifier for the calling thread
219// thread: thread info structure containing stack
220// tied_task: the task popped off the stack
221// ending_task: the task that is ending (should match popped task)
222
223static void
224__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
225{
226 // GEH - need to consider what to do if tt_threads_data not allocated yet
227 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
228 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
229 kmp_taskdata_t *tied_task;
230
231 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
232 return; // Don't pop anything from stack if team or team tasks are serialized
233 }
234
235 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
236 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
237
238 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
239
240 // fix up ts_top if we need to pop from previous block
241 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
242 {
243 kmp_stack_block_t *stack_block =
244 (kmp_stack_block_t *) (task_stack -> ts_top) ;
245
246 stack_block = stack_block -> sb_prev;
247 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
248 }
249
250 // finish bookkeeping
251 task_stack -> ts_top--;
252 task_stack -> ts_entries--;
253
254 tied_task = * (task_stack -> ts_top );
255
256 KMP_DEBUG_ASSERT( tied_task != NULL );
257 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
258 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
259
260 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
261 return;
262}
263#endif /* BUILD_TIED_TASK_STACK */
264
265//---------------------------------------------------
266// __kmp_push_task: Add a task to the thread's deque
267
268static kmp_int32
269__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
270{
271 kmp_info_t * thread = __kmp_threads[ gtid ];
272 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
273 kmp_task_team_t * task_team = thread->th.th_task_team;
274 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
275 kmp_thread_data_t * thread_data;
276
277 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
278
Jonathan Peytone6643da2016-04-18 21:35:14 +0000279 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
280 // untied task needs to increment counter so that the task structure is not freed prematurely
281 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
282 KA_TRACE(20, ( "__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
283 gtid, counter, taskdata ) );
284 }
285
Jim Cownie5e8470a2013-09-27 10:38:44 +0000286 // The first check avoids building task_team thread data if serialized
287 if ( taskdata->td_flags.task_serial ) {
288 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
289 gtid, taskdata ) );
290 return TASK_NOT_PUSHED;
291 }
292
293 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
294 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000295 if ( ! KMP_TASKING_ENABLED(task_team) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000296 __kmp_enable_tasking( task_team, thread );
297 }
298 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
299 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
300
301 // Find tasking deque specific to encountering thread
302 thread_data = & task_team -> tt.tt_threads_data[ tid ];
303
304 // No lock needed since only owner can allocate
305 if (thread_data -> td.td_deque == NULL ) {
306 __kmp_alloc_task_deque( thread, thread_data );
307 }
308
309 // Check if deque is full
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000310 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000311 {
312 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
313 gtid, taskdata ) );
314 return TASK_NOT_PUSHED;
315 }
316
317 // Lock the deque for the task push operation
318 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
319
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000320#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000321 // Need to recheck as we can get a proxy task from a thread outside of OpenMP
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000322 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000323 {
324 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
325 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
326 gtid, taskdata ) );
327 return TASK_NOT_PUSHED;
328 }
329#else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000330 // Must have room since no thread can add tasks but calling thread
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000331 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE(thread_data->td) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000332#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000333
334 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
335 // Wrap index.
Jonathan Peytonf4f96952016-05-31 19:07:00 +0000336 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000337 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
338
Jim Cownie5e8470a2013-09-27 10:38:44 +0000339 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
Andrey Churbanov5dee8c42016-12-14 08:29:00 +0000340 "task=%p ntasks=%d head=%u tail=%u\n",
341 gtid, taskdata, thread_data->td.td_deque_ntasks,
342 thread_data->td.td_deque_head, thread_data->td.td_deque_tail) );
343
344 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000345
346 return TASK_SUCCESSFULLY_PUSHED;
347}
348
349
350//-----------------------------------------------------------------------------------------
351// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
352// this_thr: thread structure to set current_task in.
353
354void
355__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
356{
357 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
358 "curtask_parent=%p\n",
359 0, this_thr, this_thr -> th.th_current_task,
360 this_thr -> th.th_current_task -> td_parent ) );
361
362 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
363
364 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
365 "curtask_parent=%p\n",
366 0, this_thr, this_thr -> th.th_current_task,
367 this_thr -> th.th_current_task -> td_parent ) );
368}
369
370
371//---------------------------------------------------------------------------------------
372// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
373// this_thr: thread structure to set up
374// team: team for implicit task data
375// tid: thread within team to set up
376
377void
378__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
379{
380 // current task of the thread is a parent of the new just created implicit tasks of new team
381 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
382 "parent_task=%p\n",
383 tid, this_thr, this_thr->th.th_current_task,
384 team->t.t_implicit_task_taskdata[tid].td_parent ) );
385
386 KMP_DEBUG_ASSERT (this_thr != NULL);
387
388 if( tid == 0 ) {
389 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
390 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
391 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
392 }
393 } else {
394 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
395 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
396 }
397
398 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
399 "parent_task=%p\n",
400 tid, this_thr, this_thr->th.th_current_task,
401 team->t.t_implicit_task_taskdata[tid].td_parent ) );
402}
403
404
405//----------------------------------------------------------------------
406// __kmp_task_start: bookkeeping for a task starting execution
407// GTID: global thread id of calling thread
408// task: task starting execution
409// current_task: task suspending
410
411static void
412__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
413{
414 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
415 kmp_info_t * thread = __kmp_threads[ gtid ];
416
417 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
418 gtid, taskdata, current_task) );
419
420 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
421
422 // mark currently executing task as suspended
423 // TODO: GEH - make sure root team implicit task is initialized properly.
424 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
425 current_task -> td_flags.executing = 0;
426
427 // Add task to stack if tied
428#ifdef BUILD_TIED_TASK_STACK
429 if ( taskdata -> td_flags.tiedness == TASK_TIED )
430 {
431 __kmp_push_task_stack( gtid, thread, taskdata );
432 }
433#endif /* BUILD_TIED_TASK_STACK */
434
435 // mark starting task as executing and as current task
436 thread -> th.th_current_task = taskdata;
437
Jonathan Peytone6643da2016-04-18 21:35:14 +0000438 KMP_DEBUG_ASSERT( taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
439 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000440 taskdata -> td_flags.started = 1;
441 taskdata -> td_flags.executing = 1;
442 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
443 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
444
445 // GEH TODO: shouldn't we pass some sort of location identifier here?
446 // APT: yes, we will pass location here.
447 // need to store current thread state (in a thread or taskdata structure)
448 // before setting work_state, otherwise wrong state is set after end of task
449
450 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
451 gtid, taskdata ) );
452
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000453#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000454 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000455 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
456 kmp_taskdata_t *parent = taskdata->td_parent;
457 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
458 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
459 parent ? &(parent->ompt_task_info.frame) : NULL,
460 taskdata->ompt_task_info.task_id,
461 taskdata->ompt_task_info.function);
462 }
463#endif
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000464#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
465 /* OMPT emit all dependences if requested by the tool */
466 if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
467 ompt_callbacks.ompt_callback(ompt_event_task_dependences))
468 {
469 ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
470 taskdata->ompt_task_info.task_id,
471 taskdata->ompt_task_info.deps,
472 taskdata->ompt_task_info.ndeps
473 );
474 /* We can now free the allocated memory for the dependencies */
475 KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps);
476 taskdata->ompt_task_info.deps = NULL;
477 taskdata->ompt_task_info.ndeps = 0;
478 }
479#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000480
Jim Cownie5e8470a2013-09-27 10:38:44 +0000481 return;
482}
483
484
485//----------------------------------------------------------------------
486// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
487// loc_ref: source location information; points to beginning of task block.
488// gtid: global thread number.
489// task: task thunk for the started task.
490
491void
492__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
493{
494 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
495 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
496
497 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
498 gtid, loc_ref, taskdata, current_task ) );
499
Jonathan Peytone6643da2016-04-18 21:35:14 +0000500 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
501 // untied task needs to increment counter so that the task structure is not freed prematurely
502 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
503 KA_TRACE(20, ( "__kmpc_omp_task_begin_if0: T#%d untied_count (%d) incremented for task %p\n",
504 gtid, counter, taskdata ) );
505 }
506
Jim Cownie5e8470a2013-09-27 10:38:44 +0000507 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
508 __kmp_task_start( gtid, task, current_task );
509
510 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
511 gtid, loc_ref, taskdata ) );
512
513 return;
514}
515
516#ifdef TASK_UNUSED
517//----------------------------------------------------------------------
518// __kmpc_omp_task_begin: report that a given task has started execution
519// NEVER GENERATED BY COMPILER, DEPRECATED!!!
520
521void
522__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
523{
524 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
525
526 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
527 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
528
529 __kmp_task_start( gtid, task, current_task );
530
531 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
532 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
533
534 return;
535}
536#endif // TASK_UNUSED
537
538
539//-------------------------------------------------------------------------------------
540// __kmp_free_task: free the current task space and the space for shareds
541// gtid: Global thread ID of calling thread
542// taskdata: task to free
543// thread: thread data structure of caller
544
545static void
546__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
547{
548 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
549 gtid, taskdata) );
550
551 // Check to make sure all flags and counters have the correct values
552 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
553 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
554 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
555 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
556 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
557 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
558
559 taskdata->td_flags.freed = 1;
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000560 ANNOTATE_HAPPENS_BEFORE(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000561 // deallocate the taskdata and shared variable blocks associated with this task
562 #if USE_FAST_MEMORY
563 __kmp_fast_free( thread, taskdata );
564 #else /* ! USE_FAST_MEMORY */
565 __kmp_thread_free( thread, taskdata );
566 #endif
567
568 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
569 gtid, taskdata) );
570}
571
572//-------------------------------------------------------------------------------------
573// __kmp_free_task_and_ancestors: free the current task and ancestors without children
574//
575// gtid: Global thread ID of calling thread
576// taskdata: task to free
577// thread: thread data structure of caller
578
579static void
580__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
581{
Jonathan Peyton0ac7b752016-10-18 17:39:06 +0000582#if OMP_45_ENABLED
Jonas Hahnfeld69f85112016-08-08 10:08:07 +0000583 // Proxy tasks must always be allowed to free their parents
584 // because they can be run in background even in serial mode.
Andrey Churbanov2e687682016-10-20 17:14:17 +0000585 kmp_int32 team_serial = ( taskdata->td_flags.team_serial ||
586 taskdata->td_flags.tasking_ser ) && !taskdata->td_flags.proxy;
Jonathan Peyton0ac7b752016-10-18 17:39:06 +0000587#else
Andrey Churbanov2e687682016-10-20 17:14:17 +0000588 kmp_int32 team_serial = taskdata->td_flags.team_serial ||
589 taskdata->td_flags.tasking_ser;
Jonathan Peyton0ac7b752016-10-18 17:39:06 +0000590#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000591 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
592
Jonas Hahnfeld69f85112016-08-08 10:08:07 +0000593 kmp_int32 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
594 KMP_DEBUG_ASSERT( children >= 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000595
596 // Now, go up the ancestor tree to see if any ancestors can now be freed.
597 while ( children == 0 )
598 {
599 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
600
601 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
602 "and freeing itself\n", gtid, taskdata) );
603
604 // --- Deallocate my ancestor task ---
605 __kmp_free_task( gtid, taskdata, thread );
606
607 taskdata = parent_taskdata;
608
Jonas Hahnfeld69f85112016-08-08 10:08:07 +0000609 // Stop checking ancestors at implicit task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000610 // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
Andrey Churbanov2e687682016-10-20 17:14:17 +0000611 if ( team_serial || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000612 return;
613
Jonas Hahnfeld69f85112016-08-08 10:08:07 +0000614 // Predecrement simulated by "- 1" calculation
615 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
616 KMP_DEBUG_ASSERT( children >= 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000617 }
618
619 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
620 "not freeing it yet\n", gtid, taskdata, children) );
621}
622
623//---------------------------------------------------------------------
624// __kmp_task_finish: bookkeeping to do when a task finishes execution
625// gtid: global thread ID for calling thread
626// task: task to be finished
627// resumed_task: task to be resumed. (may be NULL if task is serialized)
628
629static void
630__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
631{
632 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
633 kmp_info_t * thread = __kmp_threads[ gtid ];
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000634 kmp_task_team_t * task_team = thread->th.th_task_team; // might be NULL for serial teams...
Jim Cownie5e8470a2013-09-27 10:38:44 +0000635 kmp_int32 children = 0;
636
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000637#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +0000638 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000639 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
640 kmp_taskdata_t *parent = taskdata->td_parent;
641 ompt_callbacks.ompt_callback(ompt_event_task_end)(
642 taskdata->ompt_task_info.task_id);
643 }
644#endif
645
Jim Cownie5e8470a2013-09-27 10:38:44 +0000646 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
647 gtid, taskdata, resumed_task) );
648
649 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
650
651 // Pop task from stack if tied
652#ifdef BUILD_TIED_TASK_STACK
653 if ( taskdata -> td_flags.tiedness == TASK_TIED )
654 {
655 __kmp_pop_task_stack( gtid, thread, taskdata );
656 }
657#endif /* BUILD_TIED_TASK_STACK */
658
Jonathan Peytone6643da2016-04-18 21:35:14 +0000659 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
660 // untied task needs to check the counter so that the task structure is not freed prematurely
661 kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
662 KA_TRACE(20, ( "__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
663 gtid, counter, taskdata ) );
664 if ( counter > 0 ) {
665 // untied task is not done, to be continued possibly by other thread, do not free it now
666 if (resumed_task == NULL) {
667 KMP_DEBUG_ASSERT( taskdata->td_flags.task_serial );
668 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
669 }
670 thread->th.th_current_task = resumed_task; // restore current_task
671 resumed_task->td_flags.executing = 1; // resume previous task
672 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, resuming task %p\n",
673 gtid, taskdata, resumed_task) );
674 return;
675 }
676 }
677
Jim Cownie5e8470a2013-09-27 10:38:44 +0000678 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000679 taskdata -> td_flags.complete = 1; // mark the task as completed
680 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
681 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
682
683 // Only need to keep track of count if team parallel and tasking not serialized
684 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
685 // Predecrement simulated by "- 1" calculation
686 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
687 KMP_DEBUG_ASSERT( children >= 0 );
688#if OMP_40_ENABLED
689 if ( taskdata->td_taskgroup )
690 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000691#if OMP_45_ENABLED
692 }
693 // if we found proxy tasks there could exist a dependency chain
694 // with the proxy task as origin
695 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) || (task_team && task_team->tt.tt_found_proxy_tasks) ) {
696#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +0000697 __kmp_release_deps(gtid,taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000698#endif
699 }
700
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000701 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
702 // Othertwise, if a task is executed immediately from the release_deps code
703 // the flag will be reset to 1 again by this same function
704 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
705 taskdata -> td_flags.executing = 0; // suspend the finishing task
706
Jim Cownie5e8470a2013-09-27 10:38:44 +0000707 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
708 gtid, taskdata, children) );
709
Jim Cownie181b4bb2013-12-23 17:28:57 +0000710#if OMP_40_ENABLED
711 /* If the tasks' destructor thunk flag has been set, we need to invoke the
712 destructor thunk that has been generated by the compiler.
713 The code is placed here, since at this point other tasks might have been released
714 hence overlapping the destructor invokations with some other work in the
715 released tasks. The OpenMP spec is not specific on when the destructors are
716 invoked, so we should be free to choose.
Jonathan Peyton28510722016-02-25 18:04:09 +0000717 */
Jim Cownie181b4bb2013-12-23 17:28:57 +0000718 if (taskdata->td_flags.destructors_thunk) {
Jonathan Peyton28510722016-02-25 18:04:09 +0000719 kmp_routine_entry_t destr_thunk = task->data1.destructors;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000720 KMP_ASSERT(destr_thunk);
721 destr_thunk(gtid, task);
722 }
723#endif // OMP_40_ENABLED
724
Jim Cownie5e8470a2013-09-27 10:38:44 +0000725 // bookkeeping for resuming task:
726 // GEH - note tasking_ser => task_serial
727 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
728 taskdata->td_flags.task_serial);
729 if ( taskdata->td_flags.task_serial )
730 {
731 if (resumed_task == NULL) {
732 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
733 }
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000734 else
735#if OMP_45_ENABLED
736 if ( !(task_team && task_team->tt.tt_found_proxy_tasks) )
737#endif
738 {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000739 // verify resumed task passed in points to parent
740 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
741 }
742 }
743 else {
744 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
745 }
746
747 // Free this task and then ancestor tasks if they have no children.
Jonathan Peyton727ba6e2016-01-27 21:20:26 +0000748 // Restore th_current_task first as suggested by John:
749 // johnmc: if an asynchronous inquiry peers into the runtime system
750 // it doesn't see the freed task as the current task.
751 thread->th.th_current_task = resumed_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000752 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
753
Jim Cownie5e8470a2013-09-27 10:38:44 +0000754 // TODO: GEH - make sure root team implicit task is initialized properly.
755 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
756 resumed_task->td_flags.executing = 1; // resume previous task
757
758 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
759 gtid, taskdata, resumed_task) );
760
761 return;
762}
763
764//---------------------------------------------------------------------
765// __kmpc_omp_task_complete_if0: report that a task has completed execution
766// loc_ref: source location information; points to end of task block.
767// gtid: global thread number.
768// task: task thunk for the completed task.
769
770void
771__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
772{
773 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
774 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
775
776 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
777
778 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
779 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
780
781 return;
782}
783
784#ifdef TASK_UNUSED
785//---------------------------------------------------------------------
786// __kmpc_omp_task_complete: report that a task has completed execution
787// NEVER GENERATED BY COMPILER, DEPRECATED!!!
788
789void
790__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
791{
792 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
793 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
794
795 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
796
797 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
798 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
799 return;
800}
801#endif // TASK_UNUSED
802
803
Andrey Churbanove5f44922015-04-29 16:22:07 +0000804#if OMPT_SUPPORT
805//----------------------------------------------------------------------------------------------------
806// __kmp_task_init_ompt:
Jonathan Peytonb401db62015-10-09 17:38:05 +0000807// Initialize OMPT fields maintained by a task. This will only be called after
808// ompt_tool, so we already know whether ompt is enabled or not.
Andrey Churbanove5f44922015-04-29 16:22:07 +0000809
Jonathan Peytonb401db62015-10-09 17:38:05 +0000810static inline void
811__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function )
Andrey Churbanove5f44922015-04-29 16:22:07 +0000812{
Jonathan Peytonb401db62015-10-09 17:38:05 +0000813 if (ompt_enabled) {
814 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
815 task->ompt_task_info.function = function;
816 task->ompt_task_info.frame.exit_runtime_frame = NULL;
817 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000818#if OMP_40_ENABLED
819 task->ompt_task_info.ndeps = 0;
820 task->ompt_task_info.deps = NULL;
821#endif /* OMP_40_ENABLED */
Jonathan Peytonb401db62015-10-09 17:38:05 +0000822 }
Andrey Churbanove5f44922015-04-29 16:22:07 +0000823}
824#endif
825
826
Jim Cownie5e8470a2013-09-27 10:38:44 +0000827//----------------------------------------------------------------------------------------------------
828// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
829//
830// loc_ref: reference to source location of parallel region
831// this_thr: thread data structure corresponding to implicit task
832// team: team for this_thr
833// tid: thread id of given thread within team
834// set_curr_task: TRUE if need to push current task to thread
835// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
836// TODO: Get better loc_ref. Value passed in may be NULL
837
838void
839__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
840{
841 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
842
843 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
844 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
845
846 task->td_task_id = KMP_GEN_TASK_ID();
847 task->td_team = team;
848// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
849 task->td_ident = loc_ref;
850 task->td_taskwait_ident = NULL;
851 task->td_taskwait_counter = 0;
852 task->td_taskwait_thread = 0;
853
854 task->td_flags.tiedness = TASK_TIED;
855 task->td_flags.tasktype = TASK_IMPLICIT;
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000856#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000857 task->td_flags.proxy = TASK_FULL;
858#endif
859
Jim Cownie5e8470a2013-09-27 10:38:44 +0000860 // All implicit tasks are executed immediately, not deferred
861 task->td_flags.task_serial = 1;
862 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
863 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
864
865 task->td_flags.started = 1;
866 task->td_flags.executing = 1;
867 task->td_flags.complete = 0;
868 task->td_flags.freed = 0;
869
Jim Cownie181b4bb2013-12-23 17:28:57 +0000870#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000871 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000872#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000873
874 if (set_curr_task) { // only do this initialization the first time a thread is created
875 task->td_incomplete_child_tasks = 0;
876 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
877#if OMP_40_ENABLED
878 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +0000879 task->td_dephash = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000880#endif
881 __kmp_push_current_task_to_thread( this_thr, team, tid );
882 } else {
883 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
884 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
885 }
886
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000887#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +0000888 __kmp_task_init_ompt(task, tid, NULL);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000889#endif
890
Jim Cownie5e8470a2013-09-27 10:38:44 +0000891 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
892 tid, team, task ) );
893}
894
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +0000895
896//-----------------------------------------------------------------------------
897//// __kmp_finish_implicit_task: Release resources associated to implicit tasks
898//// at the end of parallel regions. Some resources are kept for reuse in the
899//// next parallel region.
900////
901//// thread: thread data structure corresponding to implicit task
902//
903void
904__kmp_finish_implicit_task(kmp_info_t *thread)
905{
906 kmp_taskdata_t *task = thread->th.th_current_task;
907 if (task->td_dephash)
908 __kmp_dephash_free_entries(thread, task->td_dephash);
909}
910
911
912//-----------------------------------------------------------------------------
913//// __kmp_free_implicit_task: Release resources associated to implicit tasks
914//// when these are destroyed regions
915////
916//// thread: thread data structure corresponding to implicit task
917//
918void
919__kmp_free_implicit_task(kmp_info_t *thread)
920{
921 kmp_taskdata_t *task = thread->th.th_current_task;
922 if (task->td_dephash)
923 __kmp_dephash_free(thread, task->td_dephash);
924 task->td_dephash = NULL;
925}
926
927
Jim Cownie5e8470a2013-09-27 10:38:44 +0000928// Round up a size to a power of two specified by val
929// Used to insert padding between structures co-allocated using a single malloc() call
930static size_t
931__kmp_round_up_to_val( size_t size, size_t val ) {
932 if ( size & ( val - 1 ) ) {
933 size &= ~ ( val - 1 );
934 if ( size <= KMP_SIZE_T_MAX - val ) {
935 size += val; // Round up if there is no overflow.
936 }; // if
937 }; // if
938 return size;
939} // __kmp_round_up_to_va
940
941
942//---------------------------------------------------------------------------------
943// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
944//
945// loc_ref: source location information
946// gtid: global thread number.
947// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
948// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
949// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
950// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
951// task_entry: Pointer to task code entry point generated by compiler.
952// returns: a pointer to the allocated kmp_task_t structure (task).
953
954kmp_task_t *
955__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
956 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
957 kmp_routine_entry_t task_entry )
958{
959 kmp_task_t *task;
960 kmp_taskdata_t *taskdata;
961 kmp_info_t *thread = __kmp_threads[ gtid ];
962 kmp_team_t *team = thread->th.th_team;
963 kmp_taskdata_t *parent_task = thread->th.th_current_task;
964 size_t shareds_offset;
965
966 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
967 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
968 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
969 sizeof_shareds, task_entry) );
970
971 if ( parent_task->td_flags.final ) {
972 if (flags->merged_if0) {
973 }
974 flags->final = 1;
975 }
976
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000977#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000978 if ( flags->proxy == TASK_PROXY ) {
979 flags->tiedness = TASK_UNTIED;
980 flags->merged_if0 = 1;
981
982 /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
983 if ( (thread->th.th_task_team) == NULL ) {
984 /* This should only happen if the team is serialized
985 setup a task team and propagate it to the thread
986 */
987 KMP_DEBUG_ASSERT(team->t.t_serialized);
988 KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
Jonathan Peyton54127982015-11-04 21:37:48 +0000989 __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000990 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
991 }
992 kmp_task_team_t * task_team = thread->th.th_task_team;
993
994 /* tasking must be enabled now as the task might not be pushed */
995 if ( !KMP_TASKING_ENABLED( task_team ) ) {
996 KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
997 __kmp_enable_tasking( task_team, thread );
998 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
999 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
1000 // No lock needed since only owner can allocate
1001 if (thread_data -> td.td_deque == NULL ) {
1002 __kmp_alloc_task_deque( thread, thread_data );
1003 }
1004 }
1005
1006 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
1007 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
1008 }
1009#endif
1010
Jim Cownie5e8470a2013-09-27 10:38:44 +00001011 // Calculate shared structure offset including padding after kmp_task_t struct
1012 // to align pointers in shared struct
1013 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
1014 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
1015
1016 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
1017 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
1018 gtid, shareds_offset) );
1019 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
1020 gtid, sizeof_shareds) );
1021
1022 // Avoid double allocation here by combining shareds with taskdata
1023 #if USE_FAST_MEMORY
1024 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
1025 #else /* ! USE_FAST_MEMORY */
1026 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
1027 #endif /* USE_FAST_MEMORY */
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001028 ANNOTATE_HAPPENS_AFTER(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001029
1030 task = KMP_TASKDATA_TO_TASK(taskdata);
1031
1032 // Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +00001033#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00001034 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
1035 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
1036#else
1037 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
1038 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
1039#endif
1040 if (sizeof_shareds > 0) {
1041 // Avoid double allocation here by combining shareds with taskdata
1042 task->shareds = & ((char *) taskdata)[ shareds_offset ];
1043 // Make sure shareds struct is aligned to pointer size
1044 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
1045 } else {
1046 task->shareds = NULL;
1047 }
1048 task->routine = task_entry;
1049 task->part_id = 0; // AC: Always start with 0 part id
1050
1051 taskdata->td_task_id = KMP_GEN_TASK_ID();
1052 taskdata->td_team = team;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001053 taskdata->td_alloc_thread = thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001054 taskdata->td_parent = parent_task;
1055 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
Jonathan Peytone6643da2016-04-18 21:35:14 +00001056 taskdata->td_untied_count = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001057 taskdata->td_ident = loc_ref;
1058 taskdata->td_taskwait_ident = NULL;
1059 taskdata->td_taskwait_counter = 0;
1060 taskdata->td_taskwait_thread = 0;
1061 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001062#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001063 // avoid copying icvs for proxy tasks
1064 if ( flags->proxy == TASK_FULL )
1065#endif
1066 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001067
1068 taskdata->td_flags.tiedness = flags->tiedness;
1069 taskdata->td_flags.final = flags->final;
1070 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001071#if OMP_40_ENABLED
1072 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1073#endif // OMP_40_ENABLED
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001074#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001075 taskdata->td_flags.proxy = flags->proxy;
Jonathan Peyton134f90d2016-02-11 23:07:30 +00001076 taskdata->td_task_team = thread->th.th_task_team;
Jonathan Peyton283a2152016-03-02 22:47:51 +00001077 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001078#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001079 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1080
1081 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1082 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
1083
1084 // GEH - TODO: fix this to copy parent task's value of team_serial flag
1085 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
1086
1087 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
1088 // tasks are not left until program termination to execute. Also, it helps locality to execute
1089 // immediately.
Jonathan Peyton7881aa12015-05-21 21:16:38 +00001090 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
Jim Cownie5e8470a2013-09-27 10:38:44 +00001091 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
1092
1093 taskdata->td_flags.started = 0;
1094 taskdata->td_flags.executing = 0;
1095 taskdata->td_flags.complete = 0;
1096 taskdata->td_flags.freed = 0;
1097
1098 taskdata->td_flags.native = flags->native;
1099
1100 taskdata->td_incomplete_child_tasks = 0;
1101 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1102#if OMP_40_ENABLED
1103 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1104 taskdata->td_dephash = NULL;
1105 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001106#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001107
1108 // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001109#if OMP_45_ENABLED
Jonathan Peyton61118492016-05-20 19:03:38 +00001110 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001111#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001112 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001113#endif
1114 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001115 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1116#if OMP_40_ENABLED
1117 if ( parent_task->td_taskgroup )
1118 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1119#endif
1120 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1121 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1122 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1123 }
1124 }
1125
1126 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1127 gtid, taskdata, taskdata->td_parent) );
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001128 ANNOTATE_HAPPENS_BEFORE(task);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001129
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001130#if OMPT_SUPPORT
Jonathan Peytonb401db62015-10-09 17:38:05 +00001131 __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001132#endif
1133
Jim Cownie5e8470a2013-09-27 10:38:44 +00001134 return task;
1135}
1136
1137
1138kmp_task_t *
1139__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1140 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1141 kmp_routine_entry_t task_entry )
1142{
1143 kmp_task_t *retval;
1144 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1145
1146 input_flags->native = FALSE;
1147 // __kmp_task_alloc() sets up all other runtime flags
1148
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001149#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001150 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001151 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1152 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001153 input_flags->proxy ? "proxy" : "",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001154 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001155#else
1156 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1157 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1158 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1159 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1160#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001161
1162 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1163 sizeof_shareds, task_entry );
1164
1165 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1166
1167 return retval;
1168}
1169
1170//-----------------------------------------------------------
1171// __kmp_invoke_task: invoke the specified task
1172//
1173// gtid: global thread ID of caller
1174// task: the task to invoke
1175// current_task: the task to resume after task invokation
1176
1177static void
1178__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1179{
1180 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001181 kmp_uint64 cur_time;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001182#if OMP_40_ENABLED
1183 int discard = 0 /* false */;
1184#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001185 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1186 gtid, taskdata, current_task) );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00001187 KMP_DEBUG_ASSERT(task);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001188#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001189 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1190 taskdata->td_flags.complete == 1)
1191 {
1192 // This is a proxy task that was already completed but it needs to run
1193 // its bottom-half finish
1194 KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1195 gtid, taskdata) );
1196
1197 __kmp_bottom_half_finish_proxy(gtid,task);
1198
1199 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1200
1201 return;
1202 }
1203#endif
1204
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001205#if USE_ITT_BUILD && USE_ITT_NOTIFY
1206 if(__kmp_forkjoin_frames_mode == 3) {
1207 // Get the current time stamp to measure task execution time to correct barrier imbalance time
1208 cur_time = __itt_get_timestamp();
1209 }
1210#endif
1211
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001212#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001213 // Proxy tasks are not handled by the runtime
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001214 if ( taskdata->td_flags.proxy != TASK_PROXY ) {
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001215#endif
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001216 ANNOTATE_HAPPENS_AFTER(task);
1217 __kmp_task_start( gtid, task, current_task );
1218#if OMP_45_ENABLED
1219 }
1220#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001221
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001222#if OMPT_SUPPORT
1223 ompt_thread_info_t oldInfo;
1224 kmp_info_t * thread;
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001225 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001226 // Store the threads states and restore them after the task
1227 thread = __kmp_threads[ gtid ];
1228 oldInfo = thread->th.ompt_thread_info;
1229 thread->th.ompt_thread_info.wait_id = 0;
1230 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1231 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1232 }
1233#endif
1234
Jim Cownie181b4bb2013-12-23 17:28:57 +00001235#if OMP_40_ENABLED
1236 // TODO: cancel tasks if the parallel region has also been cancelled
1237 // TODO: check if this sequence can be hoisted above __kmp_task_start
1238 // if cancellation has been enabled for this run ...
1239 if (__kmp_omp_cancellation) {
1240 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1241 kmp_team_t * this_team = this_thr->th.th_team;
1242 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1243 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
Jonathan Peyton45be4502015-08-11 21:36:41 +00001244 KMP_COUNT_BLOCK(TASK_cancelled);
Jim Cownie181b4bb2013-12-23 17:28:57 +00001245 // this task belongs to a task group and we need to cancel it
1246 discard = 1 /* true */;
1247 }
1248 }
1249
Jim Cownie5e8470a2013-09-27 10:38:44 +00001250 //
1251 // Invoke the task routine and pass in relevant data.
1252 // Thunks generated by gcc take a different argument list.
1253 //
Jim Cownie181b4bb2013-12-23 17:28:57 +00001254 if (!discard) {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001255#if KMP_STATS_ENABLED
Jonathan Peyton45be4502015-08-11 21:36:41 +00001256 KMP_COUNT_BLOCK(TASK_executed);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001257 switch(KMP_GET_THREAD_STATE()) {
1258 case FORK_JOIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); break;
1259 case PLAIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); break;
1260 case TASKYIELD: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); break;
1261 case TASKWAIT: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); break;
1262 case TASKGROUP: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); break;
1263 default: KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); break;
1264 }
1265#endif // KMP_STATS_ENABLED
Jim Cownie181b4bb2013-12-23 17:28:57 +00001266#endif // OMP_40_ENABLED
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001267
1268#if OMPT_SUPPORT && OMPT_TRACE
1269 /* let OMPT know that we're about to run this task */
1270 if (ompt_enabled &&
1271 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1272 {
1273 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1274 current_task->ompt_task_info.task_id,
1275 taskdata->ompt_task_info.task_id);
1276 }
1277#endif
1278
Jim Cownie5e8470a2013-09-27 10:38:44 +00001279#ifdef KMP_GOMP_COMPAT
Jim Cownie181b4bb2013-12-23 17:28:57 +00001280 if (taskdata->td_flags.native) {
1281 ((void (*)(void *))(*(task->routine)))(task->shareds);
1282 }
1283 else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001284#endif /* KMP_GOMP_COMPAT */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001285 {
1286 (*(task->routine))(gtid, task);
1287 }
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001288 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001289
1290#if OMPT_SUPPORT && OMPT_TRACE
1291 /* let OMPT know that we're returning to the callee task */
1292 if (ompt_enabled &&
1293 ompt_callbacks.ompt_callback(ompt_event_task_switch))
1294 {
1295 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1296 taskdata->ompt_task_info.task_id,
1297 current_task->ompt_task_info.task_id);
1298 }
1299#endif
1300
Jim Cownie181b4bb2013-12-23 17:28:57 +00001301#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001302 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001303#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001304
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001305
1306#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001307 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001308 thread->th.ompt_thread_info = oldInfo;
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001309 taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001310 }
1311#endif
1312
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001313#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001314 // Proxy tasks are not handled by the runtime
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001315 if ( taskdata->td_flags.proxy != TASK_PROXY ) {
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001316#endif
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001317 ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
1318 __kmp_task_finish( gtid, task, current_task );
1319#if OMP_45_ENABLED
1320 }
1321#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001322
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001323#if USE_ITT_BUILD && USE_ITT_NOTIFY
1324 // Barrier imbalance - correct arrive time after the task finished
1325 if(__kmp_forkjoin_frames_mode == 3) {
1326 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1327 if(this_thr->th.th_bar_arrive_time) {
1328 this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1329 }
1330 }
1331#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001332 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001333 gtid, taskdata, current_task) );
1334 return;
1335}
1336
1337//-----------------------------------------------------------------------
1338// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1339//
1340// loc_ref: location of original task pragma (ignored)
1341// gtid: Global Thread ID of encountering thread
1342// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1343// Returns:
1344// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1345// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1346
1347kmp_int32
1348__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1349{
1350 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1351
1352 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1353 gtid, loc_ref, new_taskdata ) );
1354
1355 /* Should we execute the new task or queue it? For now, let's just always try to
1356 queue it. If the queue fills up, then we'll execute it. */
1357
1358 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1359 { // Execute this task immediately
1360 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1361 new_taskdata->td_flags.task_serial = 1;
1362 __kmp_invoke_task( gtid, new_task, current_task );
1363 }
1364
1365 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1366 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1367 new_taskdata ) );
1368
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001369 ANNOTATE_HAPPENS_BEFORE(new_task);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001370 return TASK_CURRENT_NOT_QUEUED;
1371}
1372
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001373//---------------------------------------------------------------------
1374// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1375// gtid: Global Thread ID of encountering thread
1376// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1377// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1378// returns:
1379//
1380// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1381// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1382kmp_int32
1383__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1384{
1385 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1386
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001387#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001388 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001389 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001390 __builtin_frame_address(1);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001391 }
1392#endif
1393
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001394 /* Should we execute the new task or queue it? For now, let's just always try to
1395 queue it. If the queue fills up, then we'll execute it. */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001396#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001397 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1398#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001399 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001400#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001401 { // Execute this task immediately
1402 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1403 if ( serialize_immediate )
1404 new_taskdata -> td_flags.task_serial = 1;
1405 __kmp_invoke_task( gtid, new_task, current_task );
1406 }
1407
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001408#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001409 if (ompt_enabled) {
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001410 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001411 }
1412#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001413
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001414 ANNOTATE_HAPPENS_BEFORE(new_task);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001415 return TASK_CURRENT_NOT_QUEUED;
1416}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001417
1418//---------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001419// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1420// the parent thread only!
Jim Cownie5e8470a2013-09-27 10:38:44 +00001421// loc_ref: location of original task pragma (ignored)
1422// gtid: Global Thread ID of encountering thread
1423// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1424// returns:
1425//
1426// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1427// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1428
1429kmp_int32
1430__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1431{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001432 kmp_int32 res;
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001433 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001434
Jonathan Peytond2eb3c72015-08-26 20:02:21 +00001435#if KMP_DEBUG
1436 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1437#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001438 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1439 gtid, loc_ref, new_taskdata ) );
1440
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001441 res = __kmp_omp_task(gtid,new_task,true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001442
1443 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1444 gtid, loc_ref, new_taskdata ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001445 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001446}
1447
Jim Cownie5e8470a2013-09-27 10:38:44 +00001448//-------------------------------------------------------------------------------------
1449// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1450
1451kmp_int32
1452__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1453{
1454 kmp_taskdata_t * taskdata;
1455 kmp_info_t * thread;
1456 int thread_finished = FALSE;
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001457 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001458
Jonathan Peyton54127982015-11-04 21:37:48 +00001459 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001460
1461 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1462 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1463
1464 thread = __kmp_threads[ gtid ];
1465 taskdata = thread -> th.th_current_task;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001466
1467#if OMPT_SUPPORT && OMPT_TRACE
1468 ompt_task_id_t my_task_id;
1469 ompt_parallel_id_t my_parallel_id;
Jonathan Peyton61118492016-05-20 19:03:38 +00001470
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001471 if (ompt_enabled) {
1472 kmp_team_t *team = thread->th.th_team;
1473 my_task_id = taskdata->ompt_task_info.task_id;
1474 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peyton61118492016-05-20 19:03:38 +00001475
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001476 taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(1);
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001477 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1478 ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
1479 my_parallel_id, my_task_id);
1480 }
1481 }
1482#endif
1483
Jonathan Peyton8c61c592016-06-21 15:59:34 +00001484 // Debugger: The taskwait is active. Store location and thread encountered the taskwait.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001485#if USE_ITT_BUILD
1486 // Note: These values are used by ITT events as well.
1487#endif /* USE_ITT_BUILD */
1488 taskdata->td_taskwait_counter += 1;
1489 taskdata->td_taskwait_ident = loc_ref;
1490 taskdata->td_taskwait_thread = gtid + 1;
1491
1492#if USE_ITT_BUILD
1493 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1494 if ( itt_sync_obj != NULL )
1495 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1496#endif /* USE_ITT_BUILD */
1497
Andrey Churbanovdd313b02016-11-01 08:33:36 +00001498 bool must_wait = ! taskdata->td_flags.team_serial && ! taskdata->td_flags.final;
1499
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001500#if OMP_45_ENABLED
Andrey Churbanovdd313b02016-11-01 08:33:36 +00001501 must_wait = must_wait || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001502#endif
Andrey Churbanovdd313b02016-11-01 08:33:36 +00001503 if (must_wait)
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001504 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001505 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001506 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001507 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1508 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001509 }
1510 }
1511#if USE_ITT_BUILD
1512 if ( itt_sync_obj != NULL )
1513 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1514#endif /* USE_ITT_BUILD */
1515
1516 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
Jonathan Peyton8c61c592016-06-21 15:59:34 +00001517 // Debugger: The taskwait is completed. Location remains, but thread is negated.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001518 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001519
1520#if OMPT_SUPPORT && OMPT_TRACE
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001521 if (ompt_enabled) {
1522 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1523 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001524 my_parallel_id, my_task_id);
Jonas Hahnfeld867aa202016-02-12 12:19:59 +00001525 }
Jonas Hahnfeldfd0614d2016-09-14 13:59:13 +00001526 taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001527 }
1528#endif
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001529 ANNOTATE_HAPPENS_AFTER(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001530 }
1531
1532 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1533 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1534
1535 return TASK_CURRENT_NOT_QUEUED;
1536}
1537
1538
1539//-------------------------------------------------
1540// __kmpc_omp_taskyield: switch to a different task
1541
1542kmp_int32
1543__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1544{
1545 kmp_taskdata_t * taskdata;
1546 kmp_info_t * thread;
1547 int thread_finished = FALSE;
1548
Jonathan Peyton45be4502015-08-11 21:36:41 +00001549 KMP_COUNT_BLOCK(OMP_TASKYIELD);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001550 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001551
Jim Cownie5e8470a2013-09-27 10:38:44 +00001552 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1553 gtid, loc_ref, end_part) );
1554
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001555 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001556 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1557
1558 thread = __kmp_threads[ gtid ];
1559 taskdata = thread -> th.th_current_task;
1560 // Should we model this as a task wait or not?
Jonathan Peyton8c61c592016-06-21 15:59:34 +00001561 // Debugger: The taskwait is active. Store location and thread encountered the taskwait.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001562#if USE_ITT_BUILD
1563 // Note: These values are used by ITT events as well.
1564#endif /* USE_ITT_BUILD */
1565 taskdata->td_taskwait_counter += 1;
1566 taskdata->td_taskwait_ident = loc_ref;
1567 taskdata->td_taskwait_thread = gtid + 1;
1568
1569#if USE_ITT_BUILD
1570 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1571 if ( itt_sync_obj != NULL )
1572 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1573#endif /* USE_ITT_BUILD */
1574 if ( ! taskdata->td_flags.team_serial ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001575 kmp_task_team_t * task_team = thread->th.th_task_team;
1576 if (task_team != NULL) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001577 if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001578 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1579 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1580 }
1581 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001582 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001583#if USE_ITT_BUILD
1584 if ( itt_sync_obj != NULL )
1585 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1586#endif /* USE_ITT_BUILD */
1587
1588 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
Jonathan Peyton8c61c592016-06-21 15:59:34 +00001589 // Debugger: The taskwait is completed. Location remains, but thread is negated.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001590 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1591 }
1592
1593 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1594 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1595
1596 return TASK_CURRENT_NOT_QUEUED;
1597}
1598
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001599// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
1600#if OMP_45_ENABLED
1601//
1602// Task Reduction implementation
1603//
1604
1605typedef struct kmp_task_red_flags {
1606 unsigned lazy_priv : 1; // hint: (1) use lazy allocation (big objects)
1607 unsigned reserved31 : 31;
1608} kmp_task_red_flags_t;
1609
1610// internal structure for reduction data item related info
1611typedef struct kmp_task_red_data {
1612 void *reduce_shar; // shared reduction item
1613 size_t reduce_size; // size of data item
1614 void *reduce_priv; // thread specific data
1615 void *reduce_pend; // end of private data for comparison op
1616 void *reduce_init; // data initialization routine
1617 void *reduce_fini; // data finalization routine
1618 void *reduce_comb; // data combiner routine
1619 kmp_task_red_flags_t flags; // flags for additional info from compiler
1620} kmp_task_red_data_t;
1621
1622// structure sent us by compiler - one per reduction item
1623typedef struct kmp_task_red_input {
1624 void *reduce_shar; // shared reduction item
1625 size_t reduce_size; // size of data item
1626 void *reduce_init; // data initialization routine
1627 void *reduce_fini; // data finalization routine
1628 void *reduce_comb; // data combiner routine
1629 kmp_task_red_flags_t flags; // flags for additional info from compiler
1630} kmp_task_red_input_t;
1631
1632/*!
1633@ingroup TASKING
1634@param gtid Global thread ID
1635@param num Number of data items to reduce
1636@param data Array of data for reduction
1637@return The taskgroup identifier
1638
1639Initialize task reduction for the taskgroup.
1640*/
1641void*
1642__kmpc_task_reduction_init(int gtid, int num, void *data)
1643{
1644 kmp_info_t * thread = __kmp_threads[gtid];
1645 kmp_taskgroup_t * tg = thread->th.th_current_task->td_taskgroup;
1646 kmp_int32 nth = thread->th.th_team_nproc;
1647 kmp_task_red_input_t *input = (kmp_task_red_input_t*)data;
1648 kmp_task_red_data_t *arr;
1649
1650 // check input data just in case
1651 KMP_ASSERT(tg != NULL);
1652 KMP_ASSERT(data != NULL);
1653 KMP_ASSERT(num > 0);
1654 if (nth == 1) {
1655 KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
1656 gtid, tg));
1657 return (void*)tg;
1658 }
1659 KA_TRACE(10,("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
1660 gtid, tg, num));
1661 arr = (kmp_task_red_data_t*)__kmp_thread_malloc(thread, num * sizeof(kmp_task_red_data_t));
1662 for (int i = 0; i < num; ++i) {
1663 void(*f_init)(void*) = (void(*)(void*))(input[i].reduce_init);
1664 size_t size = input[i].reduce_size - 1;
1665 // round the size up to cache line per thread-specific item
1666 size += CACHE_LINE - size % CACHE_LINE;
1667 KMP_ASSERT(input[i].reduce_comb != NULL); // combiner is mandatory
1668 arr[i].reduce_shar = input[i].reduce_shar;
1669 arr[i].reduce_size = size;
1670 arr[i].reduce_init = input[i].reduce_init;
1671 arr[i].reduce_fini = input[i].reduce_fini;
1672 arr[i].reduce_comb = input[i].reduce_comb;
1673 arr[i].flags = input[i].flags;
1674 if (!input[i].flags.lazy_priv) {
1675 // allocate cache-line aligned block and fill it with zeros
1676 arr[i].reduce_priv = __kmp_allocate(nth * size);
1677 arr[i].reduce_pend = (char*)(arr[i].reduce_priv) + nth * size;
1678 if (f_init != NULL) {
1679 // initialize thread-specific items
1680 for (int j = 0; j < nth; ++j) {
1681 f_init((char*)(arr[i].reduce_priv) + j * size);
1682 }
1683 }
1684 } else {
1685 // only allocate space for pointers now,
1686 // objects will be lazily allocated/initialized once requested
1687 arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void*));
1688 }
1689 }
1690 tg->reduce_data = (void*)arr;
1691 tg->reduce_num_data = num;
1692 return (void*)tg;
1693}
1694
1695/*!
1696@ingroup TASKING
1697@param gtid Global thread ID
1698@param tskgrp The taskgroup ID (optional)
1699@param data Shared location of the item
1700@return The pointer to per-thread data
1701
1702Get thread-specific location of data item
1703*/
1704void*
1705__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data)
1706{
1707 kmp_info_t * thread = __kmp_threads[gtid];
1708 kmp_int32 nth = thread->th.th_team_nproc;
1709 if (nth == 1)
1710 return data; // nothing to do
1711
1712 kmp_taskgroup_t *tg = (kmp_taskgroup_t*)tskgrp;
1713 if (tg == NULL)
1714 tg = thread->th.th_current_task->td_taskgroup;
1715 KMP_ASSERT(tg != NULL);
1716 kmp_task_red_data_t *arr = (kmp_task_red_data_t*)(tg->reduce_data);
1717 kmp_int32 num = tg->reduce_num_data;
1718 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1719
1720 KMP_ASSERT(data != NULL);
1721 while (tg != NULL) {
1722 for (int i = 0; i < num; ++i) {
1723 if (!arr[i].flags.lazy_priv) {
1724 if (data == arr[i].reduce_shar ||
1725 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
1726 return (char*)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
1727 } else {
1728 // check shared location first
1729 void **p_priv = (void**)(arr[i].reduce_priv);
1730 if (data == arr[i].reduce_shar)
1731 goto found;
1732 // check if we get some thread specific location as parameter
1733 for (int j = 0; j < nth; ++j)
1734 if (data == p_priv[j])
1735 goto found;
1736 continue; // not found, continue search
1737 found:
1738 if (p_priv[tid] == NULL) {
1739 // allocate thread specific object lazily
1740 void(*f_init)(void*) = (void(*)(void*))(arr[i].reduce_init);
1741 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
1742 if (f_init != NULL) {
1743 f_init(p_priv[tid]);
1744 }
1745 }
1746 return p_priv[tid];
1747 }
1748 }
1749 tg = tg->parent;
1750 arr = (kmp_task_red_data_t*)(tg->reduce_data);
1751 num = tg->reduce_num_data;
1752 }
1753 KMP_ASSERT2(0, "Unknown task reduction item");
1754 return NULL; // ERROR, this line never executed
1755}
1756
1757// Finalize task reduction.
1758// Called from __kmpc_end_taskgroup()
1759static void
1760__kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg)
1761{
1762 kmp_int32 nth = th->th.th_team_nproc;
1763 KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1
1764 kmp_task_red_data_t *arr = (kmp_task_red_data_t*)tg->reduce_data;
1765 kmp_int32 num = tg->reduce_num_data;
1766 for (int i = 0; i < num; ++i) {
1767 void *sh_data = arr[i].reduce_shar;
1768 void(*f_fini)(void*) = (void(*)(void*))(arr[i].reduce_fini);
1769 void(*f_comb)(void*,void*) = (void(*)(void*,void*))(arr[i].reduce_comb);
1770 if (!arr[i].flags.lazy_priv) {
1771 void *pr_data = arr[i].reduce_priv;
1772 size_t size = arr[i].reduce_size;
1773 for (int j = 0; j < nth; ++j) {
1774 void * priv_data = (char*)pr_data + j * size;
1775 f_comb(sh_data, priv_data); // combine results
1776 if (f_fini)
1777 f_fini(priv_data); // finalize if needed
1778 }
1779 } else {
1780 void **pr_data = (void**)(arr[i].reduce_priv);
1781 for (int j = 0; j < nth; ++j) {
1782 if (pr_data[j] != NULL) {
1783 f_comb(sh_data, pr_data[j]); // combine results
1784 if (f_fini)
1785 f_fini(pr_data[j]); // finalize if needed
1786 __kmp_free(pr_data[j]);
1787 }
1788 }
1789 }
1790 __kmp_free(arr[i].reduce_priv);
1791 }
1792 __kmp_thread_free(th, arr);
1793 tg->reduce_data = NULL;
1794 tg->reduce_num_data = 0;
1795}
1796#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001797
1798#if OMP_40_ENABLED
1799//-------------------------------------------------------------------------------------
1800// __kmpc_taskgroup: Start a new taskgroup
1801
1802void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001803__kmpc_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001804{
1805 kmp_info_t * thread = __kmp_threads[ gtid ];
1806 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1807 kmp_taskgroup_t * tg_new =
1808 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1809 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1810 tg_new->count = 0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001811 tg_new->cancel_request = cancel_noreq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001812 tg_new->parent = taskdata->td_taskgroup;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001813// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
1814#if OMP_45_ENABLED
1815 tg_new->reduce_data = NULL;
1816 tg_new->reduce_num_data = 0;
1817#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001818 taskdata->td_taskgroup = tg_new;
1819}
1820
1821
1822//-------------------------------------------------------------------------------------
1823// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1824// and its descendants are complete
1825
1826void
Jim Cownie181b4bb2013-12-23 17:28:57 +00001827__kmpc_end_taskgroup( ident_t* loc, int gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001828{
1829 kmp_info_t * thread = __kmp_threads[ gtid ];
1830 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1831 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1832 int thread_finished = FALSE;
1833
1834 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1835 KMP_DEBUG_ASSERT( taskgroup != NULL );
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001836 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001837
1838 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1839#if USE_ITT_BUILD
1840 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1841 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1842 if ( itt_sync_obj != NULL )
1843 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1844#endif /* USE_ITT_BUILD */
1845
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001846#if OMP_45_ENABLED
Jonathan Peyton61118492016-05-20 19:03:38 +00001847 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001848#else
Jonathan Peyton61118492016-05-20 19:03:38 +00001849 if ( ! taskdata->td_flags.team_serial )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001850#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00001851 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001852 kmp_flag_32 flag(&(taskgroup->count), 0U);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001853 while ( TCR_4(taskgroup->count) != 0 ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001854 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1855 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001856 }
1857 }
1858
1859#if USE_ITT_BUILD
1860 if ( itt_sync_obj != NULL )
1861 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1862#endif /* USE_ITT_BUILD */
1863 }
1864 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1865
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001866// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
1867#if OMP_45_ENABLED
1868 if( taskgroup->reduce_data != NULL ) // need to reduce?
1869 __kmp_task_reduction_fini(thread, taskgroup);
1870#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001871 // Restore parent taskgroup for the current task
1872 taskdata->td_taskgroup = taskgroup->parent;
1873 __kmp_thread_free( thread, taskgroup );
1874
1875 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001876 ANNOTATE_HAPPENS_AFTER(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001877}
1878#endif
1879
1880
1881//------------------------------------------------------
1882// __kmp_remove_my_task: remove a task from my own deque
1883
1884static kmp_task_t *
1885__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1886 kmp_int32 is_constrained )
1887{
1888 kmp_task_t * task;
1889 kmp_taskdata_t * taskdata;
1890 kmp_thread_data_t *thread_data;
1891 kmp_uint32 tail;
1892
1893 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1894 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1895
1896 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1897
1898 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1899 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1900 thread_data->td.td_deque_tail) );
1901
1902 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1903 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1904 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1905 thread_data->td.td_deque_tail) );
1906 return NULL;
1907 }
1908
1909 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1910
1911 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1912 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1913 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1914 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1915 thread_data->td.td_deque_tail) );
1916 return NULL;
1917 }
1918
Jonathan Peytonf4f96952016-05-31 19:07:00 +00001919 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(thread_data->td); // Wrap index.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001920 taskdata = thread_data -> td.td_deque[ tail ];
1921
Jonathan Peyton8cb45c82016-06-13 17:51:59 +00001922 if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001923 // we need to check if the candidate obeys task scheduling constraint:
1924 // only child of current task can be scheduled
1925 kmp_taskdata_t * current = thread->th.th_current_task;
1926 kmp_int32 level = current->td_level;
1927 kmp_taskdata_t * parent = taskdata->td_parent;
1928 while ( parent != current && parent->td_level > level ) {
1929 parent = parent->td_parent; // check generation up to the level of the current task
1930 KMP_DEBUG_ASSERT(parent != NULL);
1931 }
1932 if ( parent != current ) {
Jonathan Peytonb6f0f522016-06-09 18:51:17 +00001933 // If the tail task is not a child, then no other child can appear in the deque.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001934 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1935 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1936 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1937 thread_data->td.td_deque_tail) );
1938 return NULL;
1939 }
1940 }
1941
1942 thread_data -> td.td_deque_tail = tail;
1943 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1944
1945 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1946
1947 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1948 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1949 thread_data->td.td_deque_tail) );
1950
1951 task = KMP_TASKDATA_TO_TASK( taskdata );
1952 return task;
1953}
1954
1955
1956//-----------------------------------------------------------
1957// __kmp_steal_task: remove a task from another thread's deque
1958// Assume that calling thread has already checked existence of
1959// task_team thread_data before calling this routine.
1960
1961static kmp_task_t *
1962__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1963 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1964 kmp_int32 is_constrained )
1965{
1966 kmp_task_t * task;
1967 kmp_taskdata_t * taskdata;
1968 kmp_thread_data_t *victim_td, *threads_data;
Jonathan Peyton7c4d66d2015-06-08 20:01:14 +00001969 kmp_int32 victim_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001970
1971 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1972
1973 threads_data = task_team -> tt.tt_threads_data;
1974 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1975
1976 victim_tid = victim->th.th_info.ds.ds_tid;
1977 victim_td = & threads_data[ victim_tid ];
1978
1979 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1980 "head=%u tail=%u\n",
1981 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1982 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1983
1984 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1985 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1986 {
1987 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1988 "ntasks=%d head=%u tail=%u\n",
1989 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1990 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1991 return NULL;
1992 }
1993
1994 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1995
1996 // Check again after we acquire the lock
1997 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1998 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1999 {
2000 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
2001 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
2002 "ntasks=%d head=%u tail=%u\n",
2003 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
2004 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
2005 return NULL;
2006 }
2007
2008 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
2009
Andrey Churbanov753fa042016-11-02 16:45:25 +00002010 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
2011 if ( is_constrained ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002012 // we need to check if the candidate obeys task scheduling constraint:
Andrey Churbanov753fa042016-11-02 16:45:25 +00002013 // only descendant of current task can be scheduled
Jim Cownie5e8470a2013-09-27 10:38:44 +00002014 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
2015 kmp_int32 level = current->td_level;
2016 kmp_taskdata_t * parent = taskdata->td_parent;
2017 while ( parent != current && parent->td_level > level ) {
2018 parent = parent->td_parent; // check generation up to the level of the current task
2019 KMP_DEBUG_ASSERT(parent != NULL);
2020 }
Andrey Churbanov51107e02016-11-01 16:19:04 +00002021 if ( parent != current ) {
Andrey Churbanov753fa042016-11-02 16:45:25 +00002022 // If the head task is not a descendant of the current task then do not
2023 // steal it. No other task in victim's deque can be a descendant of the
2024 // current task.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002025 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
2026 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
2027 "ntasks=%d head=%u tail=%u\n",
2028 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
2029 task_team, victim_td->td.td_deque_ntasks,
2030 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
2031 return NULL;
2032 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002033 }
Andrey Churbanov753fa042016-11-02 16:45:25 +00002034 // Bump head pointer and Wrap.
2035 victim_td->td.td_deque_head = (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002036 if (*thread_finished) {
2037 // We need to un-mark this victim as a finished victim. This must be done before
2038 // releasing the lock, or else other threads (starting with the master victim)
2039 // might be prematurely released from the barrier!!!
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002040 kmp_uint32 count;
2041
2042 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002043
2044 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
2045 gtid, count + 1, task_team) );
2046
2047 *thread_finished = FALSE;
2048 }
2049 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
2050
2051 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
2052
Jonathan Peyton45be4502015-08-11 21:36:41 +00002053 KMP_COUNT_BLOCK(TASK_stolen);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002054 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00002055 "ntasks=%d head=%u tail=%u\n",
2056 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
2057 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2058 victim_td->td.td_deque_tail) );
2059
2060 task = KMP_TASKDATA_TO_TASK( taskdata );
2061 return task;
2062}
2063
2064
2065//-----------------------------------------------------------------------------
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002066// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00002067// is statisfied (return true) or there are none left (return false).
2068// final_spin is TRUE if this is the spin at the release barrier.
2069// thread_finished indicates whether the thread is finished executing all
2070// the tasks it has on its deque, and is at the release barrier.
2071// spinner is the location on which to spin.
2072// spinner == NULL means only execute a single task and return.
2073// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002074template <class C>
Jonathan Peyton61118492016-05-20 19:03:38 +00002075static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002076 int *thread_finished
2077 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002078{
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002079 kmp_task_team_t * task_team = thread->th.th_task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002080 kmp_thread_data_t * threads_data;
2081 kmp_task_t * task;
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002082 kmp_info_t * other_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002083 kmp_taskdata_t * current_task = thread -> th.th_current_task;
2084 volatile kmp_uint32 * unfinished_threads;
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002085 kmp_int32 nthreads, victim=-2, use_own_tasks=1, new_victim=0, tid=thread->th.th_info.ds.ds_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002086
2087 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2088 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
2089
Jonathan Peyton54127982015-11-04 21:37:48 +00002090 if (task_team == NULL) return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002091
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002092 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00002093 gtid, final_spin, *thread_finished) );
2094
Andrey Churbanov581490e2017-02-06 18:53:32 +00002095 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002096 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2097 KMP_DEBUG_ASSERT( threads_data != NULL );
2098
2099 nthreads = task_team -> tt.tt_nproc;
2100 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002101#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002102 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
2103#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002104 KMP_DEBUG_ASSERT( nthreads > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002105#endif
Paul Osmialowski9cc353e2016-06-01 09:59:26 +00002106 KMP_DEBUG_ASSERT( (int)(TCR_4(*unfinished_threads)) >= 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002107
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002108 while (1) { // Outer loop keeps trying to find tasks in case of single thread getting tasks from target constructs
2109 while (1) { // Inner loop to find a task and execute it
2110 task = NULL;
2111 if (use_own_tasks) { // check on own queue first
2112 task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002113 }
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002114 if ((task == NULL) && (nthreads > 1)) { // Steal a task
2115 int asleep = 1;
2116 use_own_tasks = 0;
2117 // Try to steal from the last place I stole from successfully.
2118 if (victim == -2) { // haven't stolen anything yet
2119 victim = threads_data[tid].td.td_deque_last_stolen;
2120 if (victim != -1) // if we have a last stolen from victim, get the thread
2121 other_thread = threads_data[victim].td.td_thr;
2122 }
2123 if (victim != -1) { // found last victim
2124 asleep = 0;
2125 }
2126 else if (!new_victim) { // no recent steals and we haven't already used a new victim; select a random thread
2127 do { // Find a different thread to steal work from.
2128 // Pick a random thread. Initial plan was to cycle through all the threads, and only return if
2129 // we tried to steal from every thread, and failed. Arch says that's not such a great idea.
2130 victim = __kmp_get_random(thread) % (nthreads - 1);
2131 if (victim >= tid) {
2132 ++victim; // Adjusts random distribution to exclude self
2133 }
2134 // Found a potential victim
2135 other_thread = threads_data[victim].td.td_thr;
2136 // There is a slight chance that __kmp_enable_tasking() did not wake up all threads
2137 // waiting at the barrier. If victim is sleeping, then wake it up. Since we were going to
2138 // pay the cache miss penalty for referencing another thread's kmp_info_t struct anyway,
2139 // the check shouldn't cost too much performance at this point. In extra barrier mode, tasks
2140 // do not sleep at the separate tasking barrier, so this isn't a problem.
2141 asleep = 0;
2142 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2143 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
2144 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) {
2145 asleep = 1;
2146 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
2147 // A sleeping thread should not have any tasks on it's queue. There is a slight
2148 // possibility that it resumes, steals a task from another thread, which spawns more
2149 // tasks, all in the time that it takes this thread to check => don't write an assertion
2150 // that the victim's queue is empty. Try stealing from a different thread.
2151 }
2152 } while (asleep);
2153 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002154
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002155 if (!asleep) {
2156 // We have a victim to try to steal from
2157 task = __kmp_steal_task(other_thread, gtid, task_team, unfinished_threads, thread_finished, is_constrained);
2158 }
2159 if (task != NULL) { // set last stolen to victim
2160 if (threads_data[tid].td.td_deque_last_stolen != victim) {
2161 threads_data[tid].td.td_deque_last_stolen = victim;
2162 // The pre-refactored code did not try more than 1 successful new vicitm,
2163 // unless the last one generated more local tasks; new_victim keeps track of this
2164 new_victim = 1;
2165 }
2166 }
2167 else { // No tasks found; unset last_stolen
2168 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
2169 victim = -2; // no successful victim found
2170 }
2171 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002172
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002173 if (task == NULL) // break out of tasking loop
2174 break;
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002175
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002176 // Found a task; execute it
Jim Cownie5e8470a2013-09-27 10:38:44 +00002177#if USE_ITT_BUILD && USE_ITT_NOTIFY
2178 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002179 if ( itt_sync_obj == NULL ) { // we are at fork barrier where we could not get the object reliably
2180 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002181 }
2182 __kmp_itt_task_starting( itt_sync_obj );
2183 }
2184#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
2185 __kmp_invoke_task( gtid, task, current_task );
2186#if USE_ITT_BUILD
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002187 if ( itt_sync_obj != NULL ) __kmp_itt_task_finished( itt_sync_obj );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002188#endif /* USE_ITT_BUILD */
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002189 // If this thread is only partway through the barrier and the condition is met, then return now,
2190 // so that the barrier gather/release pattern can proceed. If this thread is in the last spin loop
2191 // in the barrier, waiting to be released, we know that the termination condition will not be
2192 // satisified, so don't waste any cycles checking it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002193 if (flag == NULL || (!final_spin && flag->done_check())) {
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002194 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002195 return TRUE;
2196 }
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002197 if (thread->th.th_task_team == NULL) {
2198 break;
2199 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002200 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002201 // If execution of a stolen task results in more tasks being placed on our run queue, reset use_own_tasks
2202 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
2203 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", gtid));
2204 use_own_tasks = 1;
2205 new_victim = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002206 }
2207 }
2208
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002209 // The task source has been exhausted. If in final spin loop of barrier, check if termination condition is satisfied.
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002210#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002211 // The work queue may be empty but there might be proxy tasks still executing
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002212 if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002213#else
Jonathan Peyton61118492016-05-20 19:03:38 +00002214 if (final_spin)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002215#endif
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002216 {
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002217 // First, decrement the #unfinished threads, if that has not already been done. This decrement
2218 // might be to the spin location, and result in the termination condition being satisfied.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002219 if (! *thread_finished) {
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002220 kmp_uint32 count;
2221
2222 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002223 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec unfinished_threads to %d task_team=%p\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00002224 gtid, count, task_team) );
2225 *thread_finished = TRUE;
2226 }
2227
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002228 // It is now unsafe to reference thread->th.th_team !!!
2229 // Decrementing task_team->tt.tt_unfinished_threads can allow the master thread to pass through
2230 // the barrier, where it might reset each thread's th.th_team field for the next parallel region.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002231 // If we can steal more work, we know that this has not happened yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002232 if (flag != NULL && flag->done_check()) {
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002233 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002234 return TRUE;
2235 }
2236 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002237
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002238 // If this thread's task team is NULL, master has recognized that there are no more tasks; bail out
2239 if (thread->th.th_task_team == NULL) {
2240 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid) );
2241 return FALSE;
2242 }
2243
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002244#if OMP_45_ENABLED
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002245 // We could be getting tasks from target constructs; if this is the only thread, keep trying to execute
2246 // tasks from own queue
2247 if (nthreads == 1)
2248 use_own_tasks = 1;
2249 else
2250#endif
2251 {
2252 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid) );
2253 return FALSE;
2254 }
2255 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002256}
2257
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002258int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
2259 int *thread_finished
2260 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2261{
2262 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2263 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2264}
2265
2266int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
2267 int *thread_finished
2268 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2269{
2270 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2271 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2272}
2273
2274int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
2275 int *thread_finished
2276 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2277{
2278 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2279 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2280}
2281
2282
Jim Cownie5e8470a2013-09-27 10:38:44 +00002283
2284//-----------------------------------------------------------------------------
2285// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2286// next barrier so they can assist in executing enqueued tasks.
2287// First thread in allocates the task team atomically.
2288
2289static void
2290__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2291{
Jim Cownie5e8470a2013-09-27 10:38:44 +00002292 kmp_thread_data_t *threads_data;
2293 int nthreads, i, is_init_thread;
2294
2295 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2296 __kmp_gtid_from_thread( this_thr ) ) );
2297
2298 KMP_DEBUG_ASSERT(task_team != NULL);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002299 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002300
2301 nthreads = task_team->tt.tt_nproc;
2302 KMP_DEBUG_ASSERT(nthreads > 0);
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00002303 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002304
2305 // Allocate or increase the size of threads_data if necessary
2306 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2307
2308 if (!is_init_thread) {
2309 // Some other thread already set up the array.
2310 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2311 __kmp_gtid_from_thread( this_thr ) ) );
2312 return;
2313 }
2314 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2315 KMP_DEBUG_ASSERT( threads_data != NULL );
2316
2317 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2318 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2319 {
2320 // Release any threads sleeping at the barrier, so that they can steal
2321 // tasks and execute them. In extra barrier mode, tasks do not sleep
2322 // at the separate tasking barrier, so this isn't a problem.
2323 for (i = 0; i < nthreads; i++) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002324 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002325 kmp_info_t *thread = threads_data[i].td.td_thr;
2326
2327 if (i == this_thr->th.th_info.ds.ds_tid) {
2328 continue;
2329 }
2330 // Since we haven't locked the thread's suspend mutex lock at this
2331 // point, there is a small window where a thread might be putting
2332 // itself to sleep, but hasn't set the th_sleep_loc field yet.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002333 // To work around this, __kmp_execute_tasks_template() periodically checks
Jim Cownie5e8470a2013-09-27 10:38:44 +00002334 // see if other threads are sleeping (using the same random
2335 // mechanism that is used for task stealing) and awakens them if
2336 // they are.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002337 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002338 {
2339 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2340 __kmp_gtid_from_thread( this_thr ),
2341 __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002342 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002343 }
2344 else {
2345 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2346 __kmp_gtid_from_thread( this_thr ),
2347 __kmp_gtid_from_thread( thread ) ) );
2348 }
2349 }
2350 }
2351
2352 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2353 __kmp_gtid_from_thread( this_thr ) ) );
2354}
2355
2356
2357/* ------------------------------------------------------------------------ */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002358/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00002359 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2360 * like a shadow of the kmp_team_t data struct, with a different lifetime.
2361 * After a child * thread checks into a barrier and calls __kmp_release() from
2362 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2363 * longer assume that the kmp_team_t structure is intact (at any moment, the
2364 * master thread may exit the barrier code and free the team data structure,
2365 * and return the threads to the thread pool).
2366 *
2367 * This does not work with the the tasking code, as the thread is still
2368 * expected to participate in the execution of any tasks that may have been
2369 * spawned my a member of the team, and the thread still needs access to all
2370 * to each thread in the team, so that it can steal work from it.
2371 *
2372 * Enter the existence of the kmp_task_team_t struct. It employs a reference
2373 * counting mechanims, and is allocated by the master thread before calling
2374 * __kmp_<barrier_kind>_release, and then is release by the last thread to
2375 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2376 * of the kmp_task_team_t structs for consecutive barriers can overlap
2377 * (and will, unless the master thread is the last thread to exit the barrier
2378 * release phase, which is not typical).
2379 *
2380 * The existence of such a struct is useful outside the context of tasking,
2381 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2382 * so that any performance differences show up when comparing the 2.5 vs. 3.0
2383 * libraries.
2384 *
2385 * We currently use the existence of the threads array as an indicator that
2386 * tasks were spawned since the last barrier. If the structure is to be
2387 * useful outside the context of tasking, then this will have to change, but
2388 * not settting the field minimizes the performance impact of tasking on
2389 * barriers, when no explicit tasks were spawned (pushed, actually).
2390 */
2391
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002392
Jim Cownie5e8470a2013-09-27 10:38:44 +00002393static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2394// Lock for task team data structures
2395static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2396
2397
2398//------------------------------------------------------------------------------
2399// __kmp_alloc_task_deque:
2400// Allocates a task deque for a particular thread, and initialize the necessary
2401// data structures relating to the deque. This only happens once per thread
2402// per task team since task teams are recycled.
2403// No lock is needed during allocation since each thread allocates its own
2404// deque.
2405
2406static void
2407__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2408{
2409 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2410 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2411
2412 // Initialize last stolen task field to "none"
2413 thread_data -> td.td_deque_last_stolen = -1;
2414
2415 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2416 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2417 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2418
2419 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002420 __kmp_gtid_from_thread( thread ), INITIAL_TASK_DEQUE_SIZE, thread_data ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002421 // Allocate space for task deque, and zero the deque
2422 // Cannot use __kmp_thread_calloc() because threads not around for
2423 // kmp_reap_task_team( ).
2424 thread_data -> td.td_deque = (kmp_taskdata_t **)
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002425 __kmp_allocate( INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2426 thread_data -> td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002427}
2428
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002429//------------------------------------------------------------------------------
2430// __kmp_realloc_task_deque:
2431// Re-allocates a task deque for a particular thread, copies the content from the old deque
2432// and adjusts the necessary data structures relating to the deque.
2433// This operation must be done with a the deque_lock being held
2434
2435static void __kmp_realloc_task_deque ( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2436{
2437 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
2438 kmp_int32 new_size = 2 * size;
2439
2440 KE_TRACE( 10, ( "__kmp_realloc_task_deque: T#%d reallocating deque[from %d to %d] for thread_data %p\n",
2441 __kmp_gtid_from_thread( thread ), size, new_size, thread_data ) );
2442
2443 kmp_taskdata_t ** new_deque = (kmp_taskdata_t **) __kmp_allocate( new_size * sizeof(kmp_taskdata_t *));
2444
2445 int i,j;
2446 for ( i = thread_data->td.td_deque_head, j = 0; j < size; i = (i+1) & TASK_DEQUE_MASK(thread_data->td), j++ )
2447 new_deque[j] = thread_data->td.td_deque[i];
2448
2449 __kmp_free(thread_data->td.td_deque);
2450
2451 thread_data -> td.td_deque_head = 0;
2452 thread_data -> td.td_deque_tail = size;
2453 thread_data -> td.td_deque = new_deque;
2454 thread_data -> td.td_deque_size = new_size;
2455}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002456
2457//------------------------------------------------------------------------------
2458// __kmp_free_task_deque:
2459// Deallocates a task deque for a particular thread.
2460// Happens at library deallocation so don't need to reset all thread data fields.
2461
2462static void
2463__kmp_free_task_deque( kmp_thread_data_t *thread_data )
2464{
2465 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2466
2467 if ( thread_data -> td.td_deque != NULL ) {
2468 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2469 __kmp_free( thread_data -> td.td_deque );
2470 thread_data -> td.td_deque = NULL;
2471 }
2472 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2473
2474#ifdef BUILD_TIED_TASK_STACK
2475 // GEH: Figure out what to do here for td_susp_tied_tasks
2476 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2477 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2478 }
2479#endif // BUILD_TIED_TASK_STACK
2480}
2481
2482
2483//------------------------------------------------------------------------------
2484// __kmp_realloc_task_threads_data:
2485// Allocates a threads_data array for a task team, either by allocating an initial
2486// array or enlarging an existing array. Only the first thread to get the lock
2487// allocs or enlarges the array and re-initializes the array eleemnts.
2488// That thread returns "TRUE", the rest return "FALSE".
2489// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2490// The current size is given by task_team -> tt.tt_max_threads.
2491
2492static int
2493__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2494{
2495 kmp_thread_data_t ** threads_data_p;
2496 kmp_int32 nthreads, maxthreads;
2497 int is_init_thread = FALSE;
2498
2499 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2500 // Already reallocated and initialized.
2501 return FALSE;
2502 }
2503
2504 threads_data_p = & task_team -> tt.tt_threads_data;
2505 nthreads = task_team -> tt.tt_nproc;
2506 maxthreads = task_team -> tt.tt_max_threads;
2507
2508 // All threads must lock when they encounter the first task of the implicit task
2509 // region to make sure threads_data fields are (re)initialized before used.
2510 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2511
2512 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2513 // first thread to enable tasking
2514 kmp_team_t *team = thread -> th.th_team;
2515 int i;
2516
2517 is_init_thread = TRUE;
2518 if ( maxthreads < nthreads ) {
2519
2520 if ( *threads_data_p != NULL ) {
2521 kmp_thread_data_t *old_data = *threads_data_p;
2522 kmp_thread_data_t *new_data = NULL;
2523
2524 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2525 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2526 __kmp_gtid_from_thread( thread ), task_team,
2527 nthreads, maxthreads ) );
2528 // Reallocate threads_data to have more elements than current array
2529 // Cannot use __kmp_thread_realloc() because threads not around for
2530 // kmp_reap_task_team( ). Note all new array entries are initialized
2531 // to zero by __kmp_allocate().
2532 new_data = (kmp_thread_data_t *)
2533 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2534 // copy old data to new data
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002535 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002536 (void *) old_data,
2537 maxthreads * sizeof(kmp_taskdata_t *) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002538
2539#ifdef BUILD_TIED_TASK_STACK
2540 // GEH: Figure out if this is the right thing to do
2541 for (i = maxthreads; i < nthreads; i++) {
2542 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2543 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2544 }
2545#endif // BUILD_TIED_TASK_STACK
2546 // Install the new data and free the old data
2547 (*threads_data_p) = new_data;
2548 __kmp_free( old_data );
2549 }
2550 else {
2551 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2552 "threads data for task_team %p, size = %d\n",
2553 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2554 // Make the initial allocate for threads_data array, and zero entries
2555 // Cannot use __kmp_thread_calloc() because threads not around for
2556 // kmp_reap_task_team( ).
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00002557 ANNOTATE_IGNORE_WRITES_BEGIN();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002558 *threads_data_p = (kmp_thread_data_t *)
2559 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00002560 ANNOTATE_IGNORE_WRITES_END();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002561#ifdef BUILD_TIED_TASK_STACK
2562 // GEH: Figure out if this is the right thing to do
2563 for (i = 0; i < nthreads; i++) {
2564 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2565 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2566 }
2567#endif // BUILD_TIED_TASK_STACK
2568 }
2569 task_team -> tt.tt_max_threads = nthreads;
2570 }
2571 else {
2572 // If array has (more than) enough elements, go ahead and use it
2573 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2574 }
2575
2576 // initialize threads_data pointers back to thread_info structures
2577 for (i = 0; i < nthreads; i++) {
2578 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2579 thread_data -> td.td_thr = team -> t.t_threads[i];
2580
2581 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2582 // The last stolen field survives across teams / barrier, and the number
2583 // of threads may have changed. It's possible (likely?) that a new
2584 // parallel region will exhibit the same behavior as the previous region.
2585 thread_data -> td.td_deque_last_stolen = -1;
2586 }
2587 }
2588
2589 KMP_MB();
2590 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2591 }
2592
2593 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2594 return is_init_thread;
2595}
2596
2597
2598//------------------------------------------------------------------------------
2599// __kmp_free_task_threads_data:
2600// Deallocates a threads_data array for a task team, including any attached
2601// tasking deques. Only occurs at library shutdown.
2602
2603static void
2604__kmp_free_task_threads_data( kmp_task_team_t *task_team )
2605{
2606 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2607 if ( task_team -> tt.tt_threads_data != NULL ) {
2608 int i;
2609 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2610 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2611 }
2612 __kmp_free( task_team -> tt.tt_threads_data );
2613 task_team -> tt.tt_threads_data = NULL;
2614 }
2615 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2616}
2617
2618
2619//------------------------------------------------------------------------------
2620// __kmp_allocate_task_team:
2621// Allocates a task team associated with a specific team, taking it from
2622// the global task team free list if possible. Also initializes data structures.
2623
2624static kmp_task_team_t *
2625__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2626{
2627 kmp_task_team_t *task_team = NULL;
2628 int nthreads;
2629
2630 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2631 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2632
2633 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2634 // Take a task team from the task team pool
2635 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2636 if (__kmp_free_task_teams != NULL) {
2637 task_team = __kmp_free_task_teams;
2638 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2639 task_team -> tt.tt_next = NULL;
2640 }
2641 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2642 }
2643
2644 if (task_team == NULL) {
2645 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2646 "task team for team %p\n",
2647 __kmp_gtid_from_thread( thread ), team ) );
2648 // Allocate a new task team if one is not available.
2649 // Cannot use __kmp_thread_malloc() because threads not around for
2650 // kmp_reap_task_team( ).
2651 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2652 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2653 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2654 //task_team -> tt.tt_max_threads = 0;
2655 //task_team -> tt.tt_next = NULL;
2656 }
2657
2658 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002659#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002660 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2661#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002662 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2663
Jim Cownie5e8470a2013-09-27 10:38:44 +00002664 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2665 TCW_4( task_team -> tt.tt_active, TRUE );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002666
Jonathan Peyton54127982015-11-04 21:37:48 +00002667 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
2668 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002669 return task_team;
2670}
2671
2672
2673//------------------------------------------------------------------------------
2674// __kmp_free_task_team:
2675// Frees the task team associated with a specific thread, and adds it
2676// to the global task team free list.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002677
Jonathan Peyton54127982015-11-04 21:37:48 +00002678void
Jim Cownie5e8470a2013-09-27 10:38:44 +00002679__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2680{
2681 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2682 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2683
Jim Cownie5e8470a2013-09-27 10:38:44 +00002684 // Put task team back on free list
2685 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2686
2687 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2688 task_team -> tt.tt_next = __kmp_free_task_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002689 TCW_PTR(__kmp_free_task_teams, task_team);
2690
2691 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2692}
2693
2694
2695//------------------------------------------------------------------------------
2696// __kmp_reap_task_teams:
2697// Free all the task teams on the task team free list.
2698// Should only be done during library shutdown.
2699// Cannot do anything that needs a thread structure or gtid since they are already gone.
2700
2701void
2702__kmp_reap_task_teams( void )
2703{
2704 kmp_task_team_t *task_team;
2705
2706 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2707 // Free all task_teams on the free list
2708 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2709 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2710 __kmp_free_task_teams = task_team -> tt.tt_next;
2711 task_team -> tt.tt_next = NULL;
2712
2713 // Free threads_data if necessary
2714 if ( task_team -> tt.tt_threads_data != NULL ) {
2715 __kmp_free_task_threads_data( task_team );
2716 }
2717 __kmp_free( task_team );
2718 }
2719 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2720 }
2721}
2722
Jim Cownie5e8470a2013-09-27 10:38:44 +00002723//------------------------------------------------------------------------------
2724// __kmp_wait_to_unref_task_teams:
2725// Some threads could still be in the fork barrier release code, possibly
2726// trying to steal tasks. Wait for each thread to unreference its task team.
2727//
2728void
2729__kmp_wait_to_unref_task_teams(void)
2730{
2731 kmp_info_t *thread;
2732 kmp_uint32 spins;
2733 int done;
2734
2735 KMP_INIT_YIELD( spins );
2736
Jim Cownie5e8470a2013-09-27 10:38:44 +00002737 for (;;) {
2738 done = TRUE;
2739
2740 // TODO: GEH - this may be is wrong because some sync would be necessary
2741 // in case threads are added to the pool during the traversal.
2742 // Need to verify that lock for thread pool is held when calling
2743 // this routine.
2744 for (thread = (kmp_info_t *)__kmp_thread_pool;
2745 thread != NULL;
2746 thread = thread->th.th_next_pool)
2747 {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002748#if KMP_OS_WINDOWS
2749 DWORD exit_val;
2750#endif
2751 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2752 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2753 __kmp_gtid_from_thread( thread ) ) );
2754 continue;
2755 }
2756#if KMP_OS_WINDOWS
2757 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2758 if (!__kmp_is_thread_alive(thread, &exit_val)) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002759 thread->th.th_task_team = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002760 continue;
2761 }
2762#endif
2763
2764 done = FALSE; // Because th_task_team pointer is not NULL for this thread
2765
2766 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2767 __kmp_gtid_from_thread( thread ) ) );
2768
2769 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002770 volatile void *sleep_loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002771 // If the thread is sleeping, awaken it.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002772 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002773 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2774 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002775 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002776 }
2777 }
2778 }
2779 if (done) {
2780 break;
2781 }
2782
2783 // If we are oversubscribed,
2784 // or have waited a bit (and library mode is throughput), yield.
2785 // Pause is in the following code.
2786 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2787 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2788 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002789}
2790
2791
2792//------------------------------------------------------------------------------
2793// __kmp_task_team_setup: Create a task_team for the current team, but use
2794// an already created, unused one if it already exists.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002795void
Jonathan Peyton54127982015-11-04 21:37:48 +00002796__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002797{
2798 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2799
Jonathan Peyton54127982015-11-04 21:37:48 +00002800 // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
2801 // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
Jonathan Peyton61118492016-05-20 19:03:38 +00002802 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002803 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002804 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002805 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002806 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002807 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002808
Jonathan Peyton61118492016-05-20 19:03:38 +00002809 // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is
Jonathan Peyton54127982015-11-04 21:37:48 +00002810 // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
2811 // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
Jonathan Peyton61118492016-05-20 19:03:38 +00002812 // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for
Jonathan Peyton54127982015-11-04 21:37:48 +00002813 // serialized teams.
Jonathan Peytone1dad192015-11-30 20:05:13 +00002814 if (team->t.t_nproc > 1) {
2815 int other_team = 1 - this_thr->th.th_task_state;
2816 if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
2817 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2818 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2819 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2820 ((team != NULL) ? team->t.t_id : -1), other_team ));
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002821 }
Jonathan Peytone1dad192015-11-30 20:05:13 +00002822 else { // Leave the old task team struct in place for the upcoming region; adjust as needed
2823 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2824 if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
2825 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2826 TCW_4(task_team->tt.tt_found_tasks, FALSE);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002827#if OMP_45_ENABLED
Jonathan Peytone1dad192015-11-30 20:05:13 +00002828 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2829#endif
2830 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
2831 TCW_4(task_team->tt.tt_active, TRUE );
2832 }
2833 // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
2834 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
2835 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2836 ((team != NULL) ? team->t.t_id : -1), other_team ));
2837 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002838 }
2839}
2840
2841
2842//------------------------------------------------------------------------------
2843// __kmp_task_team_sync: Propagation of task team data from team to threads
2844// which happens just after the release phase of a team barrier. This may be
2845// called by any thread, but only for teams with # threads > 1.
2846
2847void
2848__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2849{
2850 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2851
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002852 // Toggle the th_task_state field, to switch which task_team this thread refers to
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002853 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002854 // It is now safe to propagate the task team pointer from the team struct to the current thread.
2855 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
Jonathan Peyton54127982015-11-04 21:37:48 +00002856 KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002857 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2858 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002859}
2860
2861
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002862//--------------------------------------------------------------------------------------------
2863// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
Jonathan Peyton54127982015-11-04 21:37:48 +00002864// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created.
2865// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
2866// optionally as the last argument. When wait is zero, master thread does not wait for
2867// unfinished_threads to reach 0.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002868void
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002869__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
Jim Cownie181b4bb2013-12-23 17:28:57 +00002870 USE_ITT_BUILD_ARG(void * itt_sync_obj)
Jonathan Peyton54127982015-11-04 21:37:48 +00002871 , int wait)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002872{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002873 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002874
2875 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2876 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2877
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002878 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
Jonathan Peyton54127982015-11-04 21:37:48 +00002879 if (wait) {
2880 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2881 __kmp_gtid_from_thread(this_thr), task_team));
2882 // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
2883 // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
2884 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2885 flag.wait(this_thr, TRUE
2886 USE_ITT_BUILD_ARG(itt_sync_obj));
2887 }
2888 // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
2889 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
2890 "setting active to false, setting local and team's pointer to NULL\n",
Jonathan Peytone03b62f2015-10-08 18:49:40 +00002891 __kmp_gtid_from_thread(this_thr), task_team));
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002892#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002893 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2894 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2895#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00002896 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002897#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002898 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2899 KMP_MB();
2900
2901 TCW_PTR(this_thr->th.th_task_team, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002902 }
2903}
2904
2905
2906//------------------------------------------------------------------------------
2907// __kmp_tasking_barrier:
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002908// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002909// Internal function to execute all tasks prior to a regular barrier or a
2910// join barrier. It is a full barrier itself, which unfortunately turns
2911// regular barriers into double barriers and join barriers into 1 1/2
2912// barriers.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002913void
2914__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2915{
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002916 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002917 int flag = FALSE;
2918 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2919
2920#if USE_ITT_BUILD
2921 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2922#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002923 kmp_flag_32 spin_flag(spin, 0U);
2924 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2925 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002926#if USE_ITT_BUILD
2927 // TODO: What about itt_sync_obj??
2928 KMP_FSYNC_SPIN_PREPARE( spin );
2929#endif /* USE_ITT_BUILD */
2930
2931 if( TCR_4(__kmp_global.g.g_done) ) {
2932 if( __kmp_global.g.g_abort )
2933 __kmp_abort_thread( );
2934 break;
2935 }
2936 KMP_YIELD( TRUE ); // GH: We always yield here
2937 }
2938#if USE_ITT_BUILD
2939 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2940#endif /* USE_ITT_BUILD */
2941}
2942
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002943
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002944#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002945
2946/* __kmp_give_task puts a task into a given thread queue if:
Jonathan Peytonff684e42016-02-11 22:58:29 +00002947 - the queue for that thread was created
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002948 - there's space in that queue
2949
2950 Because of this, __kmp_push_task needs to check if there's space after getting the lock
2951 */
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002952static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task, kmp_int32 pass )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002953{
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002954 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002955 kmp_task_team_t * task_team = taskdata->td_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002956
2957 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2958
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002959 // If task_team is NULL something went really bad...
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002960 KMP_DEBUG_ASSERT( task_team != NULL );
2961
Jonathan Peyton134f90d2016-02-11 23:07:30 +00002962 bool result = false;
2963 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2964
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002965 if (thread_data -> td.td_deque == NULL ) {
2966 // There's no queue in this thread, go find another one
2967 // We're guaranteed that at least one thread has a queue
2968 KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2969 return result;
2970 }
2971
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002972 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002973 {
2974 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002975
2976 // if this deque is bigger than the pass ratio give a chance to another thread
2977 if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass ) return result;
2978
2979 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2980 __kmp_realloc_task_deque(thread,thread_data);
2981
2982 } else {
2983
2984 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2985
2986 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
2987 {
2988 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2989
2990 // if this deque is bigger than the pass ratio give a chance to another thread
2991 if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass )
2992 goto release_and_exit;
2993
2994 __kmp_realloc_task_deque(thread,thread_data);
2995 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002996 }
2997
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002998 // lock is held here, and there is space in the deque
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002999
3000 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
3001 // Wrap index.
Jonathan Peytonf4f96952016-05-31 19:07:00 +00003002 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003003 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
3004
3005 result = true;
Jonathan Peyton1406f012015-05-22 22:35:51 +00003006 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003007
3008release_and_exit:
3009 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
3010
3011 return result;
3012}
3013
3014
3015/* The finish of the a proxy tasks is divided in two pieces:
3016 - the top half is the one that can be done from a thread outside the team
3017 - the bottom half must be run from a them within the team
3018
3019 In order to run the bottom half the task gets queued back into one of the threads of the team.
3020 Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
3021 So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
3022 - things that can be run before queuing the bottom half
3023 - things that must be run after queuing the bottom half
3024
3025 This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
3026 we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
3027*/
3028
3029static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
3030{
3031 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
3032 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
3033 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
3034 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
3035
3036 taskdata -> td_flags.complete = 1; // mark the task as completed
3037
3038 if ( taskdata->td_taskgroup )
3039 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
3040
3041 // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
Paul Osmialowski52bef532016-05-07 00:00:00 +00003042 TCI_4(taskdata->td_incomplete_child_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003043}
3044
3045static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
3046{
3047 kmp_int32 children = 0;
3048
3049 // Predecrement simulated by "- 1" calculation
3050 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
3051 KMP_DEBUG_ASSERT( children >= 0 );
3052
3053 // Remove the imaginary children
Paul Osmialowski52bef532016-05-07 00:00:00 +00003054 TCD_4(taskdata->td_incomplete_child_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003055}
3056
3057static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
3058{
3059 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
3060 kmp_info_t * thread = __kmp_threads[ gtid ];
3061
3062 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
3063 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
3064
3065 // We need to wait to make sure the top half is finished
3066 // Spinning here should be ok as this should happen quickly
3067 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
3068
3069 __kmp_release_deps(gtid,taskdata);
3070 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
3071}
3072
3073/*!
3074@ingroup TASKING
3075@param gtid Global Thread ID of encountering thread
3076@param ptask Task which execution is completed
3077
3078Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
3079*/
3080void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
3081{
3082 KMP_DEBUG_ASSERT( ptask != NULL );
3083 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
3084 KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
3085
3086 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
3087
3088 __kmp_first_top_half_finish_proxy(taskdata);
3089 __kmp_second_top_half_finish_proxy(taskdata);
3090 __kmp_bottom_half_finish_proxy(gtid,ptask);
3091
3092 KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
3093}
3094
3095/*!
3096@ingroup TASKING
3097@param ptask Task which execution is completed
3098
3099Execute the completation of a proxy task from a thread that could not belong to the team.
3100*/
3101void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
3102{
3103 KMP_DEBUG_ASSERT( ptask != NULL );
3104 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
3105
3106 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
3107
3108 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
3109
3110 __kmp_first_top_half_finish_proxy(taskdata);
3111
Jonathan Peytonff684e42016-02-11 22:58:29 +00003112 // Enqueue task to complete bottom half completion from a thread within the corresponding team
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003113 kmp_team_t * team = taskdata->td_team;
3114 kmp_int32 nthreads = team->t.t_nproc;
3115 kmp_info_t *thread;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00003116
3117 //This should be similar to start_k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
3118 kmp_int32 start_k = 0;
3119 kmp_int32 pass = 1;
3120 kmp_int32 k = start_k;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003121
3122 do {
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003123 //For now we're just linearly trying to find a thread
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003124 thread = team->t.t_threads[k];
Jonathan Peytonf4f96952016-05-31 19:07:00 +00003125 k = (k+1) % nthreads;
3126
3127 // we did a full pass through all the threads
3128 if ( k == start_k ) pass = pass << 1;
3129
3130 } while ( !__kmp_give_task( thread, k, ptask, pass ) );
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003131
3132 __kmp_second_top_half_finish_proxy(taskdata);
3133
3134 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
3135}
3136
Jonathan Peyton283a2152016-03-02 22:47:51 +00003137//---------------------------------------------------------------------------------
3138// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task for taskloop
3139//
3140// thread: allocating thread
3141// task_src: pointer to source task to be duplicated
3142// returns: a pointer to the allocated kmp_task_t structure (task).
3143kmp_task_t *
3144__kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src )
3145{
3146 kmp_task_t *task;
3147 kmp_taskdata_t *taskdata;
3148 kmp_taskdata_t *taskdata_src;
3149 kmp_taskdata_t *parent_task = thread->th.th_current_task;
3150 size_t shareds_offset;
3151 size_t task_size;
3152
3153 KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) );
3154 taskdata_src = KMP_TASK_TO_TASKDATA( task_src );
3155 KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL ); // it should not be proxy task
3156 KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT );
3157 task_size = taskdata_src->td_size_alloc;
3158
3159 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
3160 KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) );
3161 #if USE_FAST_MEMORY
3162 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size );
3163 #else
3164 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size );
3165 #endif /* USE_FAST_MEMORY */
3166 KMP_MEMCPY(taskdata, taskdata_src, task_size);
3167
3168 task = KMP_TASKDATA_TO_TASK(taskdata);
3169
3170 // Initialize new task (only specific fields not affected by memcpy)
3171 taskdata->td_task_id = KMP_GEN_TASK_ID();
3172 if( task->shareds != NULL ) { // need setup shareds pointer
3173 shareds_offset = (char*)task_src->shareds - (char*)taskdata_src;
3174 task->shareds = &((char*)taskdata)[shareds_offset];
3175 KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (sizeof(void*)-1)) == 0 );
3176 }
3177 taskdata->td_alloc_thread = thread;
3178 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
3179
3180 // Only need to keep track of child task counts if team parallel and tasking not serialized
3181 if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) {
3182 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
3183 if ( parent_task->td_taskgroup )
3184 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
3185 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
3186 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT )
3187 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
3188 }
3189
3190 KA_TRACE(20, ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
3191 thread, taskdata, taskdata->td_parent) );
3192#if OMPT_SUPPORT
3193 __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (void*)task->routine);
3194#endif
3195 return task;
3196}
3197
3198// Routine optionally generated by th ecompiler for setting the lastprivate flag
3199// and calling needed constructors for private/firstprivate objects
3200// (used to form taskloop tasks from pattern task)
3201typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
3202
3203//---------------------------------------------------------------------------------
3204// __kmp_taskloop_linear: Start tasks of the taskloop linearly
3205//
3206// loc Source location information
3207// gtid Global thread ID
3208// task Task with whole loop iteration range
3209// lb Pointer to loop lower bound
3210// ub Pointer to loop upper bound
3211// st Loop stride
3212// sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3213// grainsize Schedule value if specified
3214// task_dup Tasks duplication routine
3215void
3216__kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
3217 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3218 int sched, kmp_uint64 grainsize, void *task_dup )
3219{
Jonathan Peyton5a299da2016-06-13 16:56:41 +00003220 KMP_COUNT_BLOCK(OMP_TASKLOOP);
3221 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
Jonathan Peyton283a2152016-03-02 22:47:51 +00003222 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
3223 kmp_uint64 tc;
3224 kmp_uint64 lower = *lb; // compiler provides global bounds here
3225 kmp_uint64 upper = *ub;
Samuel Antao11e4c532016-03-12 00:55:17 +00003226 kmp_uint64 i, num_tasks = 0, extras = 0;
Jonathan Peyton283a2152016-03-02 22:47:51 +00003227 kmp_info_t *thread = __kmp_threads[gtid];
3228 kmp_taskdata_t *current_task = thread->th.th_current_task;
3229 kmp_task_t *next_task;
3230 kmp_int32 lastpriv = 0;
3231 size_t lower_offset = (char*)lb - (char*)task; // remember offset of lb in the task structure
3232 size_t upper_offset = (char*)ub - (char*)task; // remember offset of ub in the task structure
3233
3234 // compute trip count
3235 if ( st == 1 ) { // most common case
3236 tc = upper - lower + 1;
3237 } else if ( st < 0 ) {
3238 tc = (lower - upper) / (-st) + 1;
3239 } else { // st > 0
3240 tc = (upper - lower) / st + 1;
3241 }
3242 if(tc == 0) {
Jonathan Peytond4f39772016-06-21 19:18:13 +00003243 KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
Jonathan Peyton283a2152016-03-02 22:47:51 +00003244 // free the pattern task and exit
3245 __kmp_task_start( gtid, task, current_task );
3246 // do not execute anything for zero-trip loop
3247 __kmp_task_finish( gtid, task, current_task );
3248 return;
3249 }
3250
3251 // compute num_tasks/grainsize based on the input provided
3252 switch( sched ) {
3253 case 0: // no schedule clause specified, we can choose the default
3254 // let's try to schedule (team_size*10) tasks
3255 grainsize = thread->th.th_team_nproc * 10;
3256 case 2: // num_tasks provided
3257 if( grainsize > tc ) {
3258 num_tasks = tc; // too big num_tasks requested, adjust values
3259 grainsize = 1;
3260 extras = 0;
3261 } else {
3262 num_tasks = grainsize;
3263 grainsize = tc / num_tasks;
3264 extras = tc % num_tasks;
3265 }
3266 break;
3267 case 1: // grainsize provided
3268 if( grainsize > tc ) {
3269 num_tasks = 1; // too big grainsize requested, adjust values
3270 grainsize = tc;
3271 extras = 0;
3272 } else {
3273 num_tasks = tc / grainsize;
3274 grainsize = tc / num_tasks; // adjust grainsize for balanced distribution of iterations
3275 extras = tc % num_tasks;
3276 }
3277 break;
3278 default:
3279 KMP_ASSERT2(0, "unknown scheduling of taskloop");
3280 }
3281 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3282 KMP_DEBUG_ASSERT(num_tasks > extras);
3283 KMP_DEBUG_ASSERT(num_tasks > 0);
Jonathan Peytond4f39772016-06-21 19:18:13 +00003284 KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize %lld, extras %lld\n",
3285 gtid, num_tasks, grainsize, extras));
Jonathan Peyton283a2152016-03-02 22:47:51 +00003286
3287 // Main loop, launch num_tasks tasks, assign grainsize iterations each task
3288 for( i = 0; i < num_tasks; ++i ) {
3289 kmp_uint64 chunk_minus_1;
3290 if( extras == 0 ) {
3291 chunk_minus_1 = grainsize - 1;
3292 } else {
3293 chunk_minus_1 = grainsize;
3294 --extras; // first extras iterations get bigger chunk (grainsize+1)
3295 }
3296 upper = lower + st * chunk_minus_1;
3297 if( i == num_tasks - 1 ) {
3298 // schedule the last task, set lastprivate flag
3299 lastpriv = 1;
3300#if KMP_DEBUG
3301 if( st == 1 )
3302 KMP_DEBUG_ASSERT(upper == *ub);
3303 else if( st > 0 )
3304 KMP_DEBUG_ASSERT(upper+st > *ub);
3305 else
3306 KMP_DEBUG_ASSERT(upper+st < *ub);
3307#endif
3308 }
3309 next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
3310 *(kmp_uint64*)((char*)next_task + lower_offset) = lower; // adjust task-specific bounds
3311 *(kmp_uint64*)((char*)next_task + upper_offset) = upper;
3312 if( ptask_dup != NULL )
3313 ptask_dup(next_task, task, lastpriv); // set lastprivate flag, construct fistprivates, etc.
Jonathan Peytond4f39772016-06-21 19:18:13 +00003314 KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper %lld (offsets %p %p)\n",
3315 gtid, next_task, lower, upper, lower_offset, upper_offset));
Jonathan Peyton283a2152016-03-02 22:47:51 +00003316 __kmp_omp_task(gtid, next_task, true); // schedule new task
3317 lower = upper + st; // adjust lower bound for the next iteration
3318 }
3319 // free the pattern task and exit
3320 __kmp_task_start( gtid, task, current_task );
3321 // do not execute the pattern task, just do bookkeeping
3322 __kmp_task_finish( gtid, task, current_task );
3323}
3324
3325/*!
3326@ingroup TASKING
3327@param loc Source location information
3328@param gtid Global thread ID
3329@param task Task structure
3330@param if_val Value of the if clause
3331@param lb Pointer to loop lower bound
3332@param ub Pointer to loop upper bound
3333@param st Loop stride
3334@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
3335@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3336@param grainsize Schedule value if specified
3337@param task_dup Tasks duplication routine
3338
3339Execute the taskloop construct.
3340*/
3341void
3342__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
3343 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3344 int nogroup, int sched, kmp_uint64 grainsize, void *task_dup )
3345{
3346 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
3347 KMP_DEBUG_ASSERT( task != NULL );
3348
3349 KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n",
3350 gtid, taskdata, *lb, *ub, st, grainsize, sched));
3351
3352 // check if clause value first
3353 if( if_val == 0 ) { // if(0) specified, mark task as serial
3354 taskdata->td_flags.task_serial = 1;
3355 taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
3356 }
3357 if( nogroup == 0 ) {
3358 __kmpc_taskgroup( loc, gtid );
3359 }
3360
3361 if( 1 /* AC: use some heuristic here to choose task scheduling method */ ) {
3362 __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup );
3363 }
3364
3365 if( nogroup == 0 ) {
3366 __kmpc_end_taskgroup( loc, gtid );
3367 }
3368 KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
3369}
3370
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003371#endif