blob: 4be322f2d45c0b3c5b2bcf6fb59cd382fb3d12e1 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_tasking.cpp -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jonathan Peyton45be4502015-08-11 21:36:41 +000019#include "kmp_stats.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000020#include "kmp_wait_release.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanove5f44922015-04-29 16:22:07 +000022#if OMPT_SUPPORT
23#include "ompt-specific.h"
24#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000026#include "tsan_annotations.h"
27
Jim Cownie5e8470a2013-09-27 10:38:44 +000028/* forward declaration */
Jonathan Peyton30419822017-05-12 18:01:32 +000029static void __kmp_enable_tasking(kmp_task_team_t *task_team,
30 kmp_info_t *this_thr);
31static void __kmp_alloc_task_deque(kmp_info_t *thread,
32 kmp_thread_data_t *thread_data);
33static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
34 kmp_task_team_t *task_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +000035
Jonathan Peytondf6818b2016-06-14 17:57:47 +000036#ifdef OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000037static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +000038#endif
39
Jim Cownie5e8470a2013-09-27 10:38:44 +000040#ifdef BUILD_TIED_TASK_STACK
41
Jim Cownie5e8470a2013-09-27 10:38:44 +000042// __kmp_trace_task_stack: print the tied tasks from the task stack in order
Jonathan Peyton30419822017-05-12 18:01:32 +000043// from top do bottom
Jim Cownie5e8470a2013-09-27 10:38:44 +000044//
45// gtid: global thread identifier for thread containing stack
46// thread_data: thread data for task team thread containing stack
47// threshold: value above which the trace statement triggers
48// location: string identifying call site of this function (for trace)
Jonathan Peyton30419822017-05-12 18:01:32 +000049static void __kmp_trace_task_stack(kmp_int32 gtid,
50 kmp_thread_data_t *thread_data,
51 int threshold, char *location) {
52 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
53 kmp_taskdata_t **stack_top = task_stack->ts_top;
54 kmp_int32 entries = task_stack->ts_entries;
55 kmp_taskdata_t *tied_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +000056
Jonathan Peyton30419822017-05-12 18:01:32 +000057 KA_TRACE(
58 threshold,
59 ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
60 "first_block = %p, stack_top = %p \n",
61 location, gtid, entries, task_stack->ts_first_block, stack_top));
Jim Cownie5e8470a2013-09-27 10:38:44 +000062
Jonathan Peyton30419822017-05-12 18:01:32 +000063 KMP_DEBUG_ASSERT(stack_top != NULL);
64 KMP_DEBUG_ASSERT(entries > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +000065
Jonathan Peyton30419822017-05-12 18:01:32 +000066 while (entries != 0) {
67 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
68 // fix up ts_top if we need to pop from previous block
69 if (entries & TASK_STACK_INDEX_MASK == 0) {
70 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
Jim Cownie5e8470a2013-09-27 10:38:44 +000071
Jonathan Peyton30419822017-05-12 18:01:32 +000072 stack_block = stack_block->sb_prev;
73 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
Jim Cownie5e8470a2013-09-27 10:38:44 +000074 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000075
Jonathan Peyton30419822017-05-12 18:01:32 +000076 // finish bookkeeping
77 stack_top--;
78 entries--;
79
80 tied_task = *stack_top;
81
82 KMP_DEBUG_ASSERT(tied_task != NULL);
83 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
84
85 KA_TRACE(threshold,
86 ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
87 "stack_top=%p, tied_task=%p\n",
88 location, gtid, entries, stack_top, tied_task));
89 }
90 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
91
92 KA_TRACE(threshold,
93 ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
94 location, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +000095}
96
Jim Cownie5e8470a2013-09-27 10:38:44 +000097// __kmp_init_task_stack: initialize the task stack for the first time
Jonathan Peyton30419822017-05-12 18:01:32 +000098// after a thread_data structure is created.
99// It should not be necessary to do this again (assuming the stack works).
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100//
101// gtid: global thread identifier of calling thread
102// thread_data: thread data for task team thread containing stack
Jonathan Peyton30419822017-05-12 18:01:32 +0000103static void __kmp_init_task_stack(kmp_int32 gtid,
104 kmp_thread_data_t *thread_data) {
105 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
106 kmp_stack_block_t *first_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000107
Jonathan Peyton30419822017-05-12 18:01:32 +0000108 // set up the first block of the stack
109 first_block = &task_stack->ts_first_block;
110 task_stack->ts_top = (kmp_taskdata_t **)first_block;
111 memset((void *)first_block, '\0',
112 TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000113
Jonathan Peyton30419822017-05-12 18:01:32 +0000114 // initialize the stack to be empty
115 task_stack->ts_entries = TASK_STACK_EMPTY;
116 first_block->sb_next = NULL;
117 first_block->sb_prev = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000118}
119
Jim Cownie5e8470a2013-09-27 10:38:44 +0000120// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
121//
122// gtid: global thread identifier for calling thread
123// thread_data: thread info for thread containing stack
Jonathan Peyton30419822017-05-12 18:01:32 +0000124static void __kmp_free_task_stack(kmp_int32 gtid,
125 kmp_thread_data_t *thread_data) {
126 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
127 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128
Jonathan Peyton30419822017-05-12 18:01:32 +0000129 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
130 // free from the second block of the stack
131 while (stack_block != NULL) {
132 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000133
Jonathan Peyton30419822017-05-12 18:01:32 +0000134 stack_block->sb_next = NULL;
135 stack_block->sb_prev = NULL;
136 if (stack_block != &task_stack->ts_first_block) {
137 __kmp_thread_free(thread,
138 stack_block); // free the block, if not the first
Jim Cownie5e8470a2013-09-27 10:38:44 +0000139 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000140 stack_block = next_block;
141 }
142 // initialize the stack to be empty
143 task_stack->ts_entries = 0;
144 task_stack->ts_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000145}
146
Jim Cownie5e8470a2013-09-27 10:38:44 +0000147// __kmp_push_task_stack: Push the tied task onto the task stack.
148// Grow the stack if necessary by allocating another block.
149//
150// gtid: global thread identifier for calling thread
151// thread: thread info for thread containing stack
152// tied_task: the task to push on the stack
Jonathan Peyton30419822017-05-12 18:01:32 +0000153static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
154 kmp_taskdata_t *tied_task) {
155 // GEH - need to consider what to do if tt_threads_data not allocated yet
156 kmp_thread_data_t *thread_data =
157 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
158 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159
Jonathan Peyton30419822017-05-12 18:01:32 +0000160 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
161 return; // Don't push anything on stack if team or team tasks are serialized
162 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000163
Jonathan Peyton30419822017-05-12 18:01:32 +0000164 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
165 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
166
167 KA_TRACE(20,
168 ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
169 gtid, thread, tied_task));
170 // Store entry
171 *(task_stack->ts_top) = tied_task;
172
173 // Do bookkeeping for next push
174 task_stack->ts_top++;
175 task_stack->ts_entries++;
176
177 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
178 // Find beginning of this task block
179 kmp_stack_block_t *stack_block =
180 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
181
182 // Check if we already have a block
183 if (stack_block->sb_next !=
184 NULL) { // reset ts_top to beginning of next block
185 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
186 } else { // Alloc new block and link it up
187 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
188 thread, sizeof(kmp_stack_block_t));
189
190 task_stack->ts_top = &new_block->sb_block[0];
191 stack_block->sb_next = new_block;
192 new_block->sb_prev = stack_block;
193 new_block->sb_next = NULL;
194
195 KA_TRACE(
196 30,
197 ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
198 gtid, tied_task, new_block));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000199 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000200 }
201 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
202 tied_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000203}
204
Jim Cownie5e8470a2013-09-27 10:38:44 +0000205// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
Jonathan Peyton30419822017-05-12 18:01:32 +0000206// the task, just check to make sure it matches the ending task passed in.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000207//
208// gtid: global thread identifier for the calling thread
209// thread: thread info structure containing stack
210// tied_task: the task popped off the stack
211// ending_task: the task that is ending (should match popped task)
Jonathan Peyton30419822017-05-12 18:01:32 +0000212static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
213 kmp_taskdata_t *ending_task) {
214 // GEH - need to consider what to do if tt_threads_data not allocated yet
215 kmp_thread_data_t *thread_data =
216 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
217 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
218 kmp_taskdata_t *tied_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000219
Jonathan Peyton30419822017-05-12 18:01:32 +0000220 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
221 // Don't pop anything from stack if team or team tasks are serialized
Jim Cownie5e8470a2013-09-27 10:38:44 +0000222 return;
Jonathan Peyton30419822017-05-12 18:01:32 +0000223 }
224
225 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
226 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
227
228 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
229 thread));
230
231 // fix up ts_top if we need to pop from previous block
232 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
233 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
234
235 stack_block = stack_block->sb_prev;
236 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
237 }
238
239 // finish bookkeeping
240 task_stack->ts_top--;
241 task_stack->ts_entries--;
242
243 tied_task = *(task_stack->ts_top);
244
245 KMP_DEBUG_ASSERT(tied_task != NULL);
246 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
247 KMP_DEBUG_ASSERT(tied_task == ending_task); // If we built the stack correctly
248
249 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
250 tied_task));
251 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252}
253#endif /* BUILD_TIED_TASK_STACK */
254
Jim Cownie5e8470a2013-09-27 10:38:44 +0000255// __kmp_push_task: Add a task to the thread's deque
Jonathan Peyton30419822017-05-12 18:01:32 +0000256static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
257 kmp_info_t *thread = __kmp_threads[gtid];
258 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
259 kmp_task_team_t *task_team = thread->th.th_task_team;
260 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
261 kmp_thread_data_t *thread_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000262
Jonathan Peyton30419822017-05-12 18:01:32 +0000263 KA_TRACE(20,
264 ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000265
Jonathan Peyton30419822017-05-12 18:01:32 +0000266 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
267 // untied task needs to increment counter so that the task structure is not
268 // freed prematurely
269 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
270 KA_TRACE(
271 20,
272 ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
273 gtid, counter, taskdata));
274 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000275
Jonathan Peyton30419822017-05-12 18:01:32 +0000276 // The first check avoids building task_team thread data if serialized
277 if (taskdata->td_flags.task_serial) {
278 KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning "
279 "TASK_NOT_PUSHED for task %p\n",
280 gtid, taskdata));
281 return TASK_NOT_PUSHED;
282 }
Jonathan Peytone6643da2016-04-18 21:35:14 +0000283
Jonathan Peyton30419822017-05-12 18:01:32 +0000284 // Now that serialized tasks have returned, we can assume that we are not in
285 // immediate exec mode
286 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
287 if (!KMP_TASKING_ENABLED(task_team)) {
288 __kmp_enable_tasking(task_team, thread);
289 }
290 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
291 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000292
Jonathan Peyton30419822017-05-12 18:01:32 +0000293 // Find tasking deque specific to encountering thread
294 thread_data = &task_team->tt.tt_threads_data[tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000295
Jonathan Peyton30419822017-05-12 18:01:32 +0000296 // No lock needed since only owner can allocate
297 if (thread_data->td.td_deque == NULL) {
298 __kmp_alloc_task_deque(thread, thread_data);
299 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000300
Jonathan Peyton30419822017-05-12 18:01:32 +0000301 // Check if deque is full
302 if (TCR_4(thread_data->td.td_deque_ntasks) >=
303 TASK_DEQUE_SIZE(thread_data->td)) {
304 KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning "
305 "TASK_NOT_PUSHED for task %p\n",
306 gtid, taskdata));
307 return TASK_NOT_PUSHED;
308 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000309
Jonathan Peyton30419822017-05-12 18:01:32 +0000310 // Lock the deque for the task push operation
311 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000312
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000313#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000314 // Need to recheck as we can get a proxy task from a thread outside of OpenMP
315 if (TCR_4(thread_data->td.td_deque_ntasks) >=
316 TASK_DEQUE_SIZE(thread_data->td)) {
317 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
318 KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; returning "
319 "TASK_NOT_PUSHED for task %p\n",
320 gtid, taskdata));
321 return TASK_NOT_PUSHED;
322 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000323#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000324 // Must have room since no thread can add tasks but calling thread
325 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
326 TASK_DEQUE_SIZE(thread_data->td));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000327#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000328
Jonathan Peyton30419822017-05-12 18:01:32 +0000329 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
330 taskdata; // Push taskdata
331 // Wrap index.
332 thread_data->td.td_deque_tail =
333 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
334 TCW_4(thread_data->td.td_deque_ntasks,
335 TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count
Jim Cownie5e8470a2013-09-27 10:38:44 +0000336
Jonathan Peyton30419822017-05-12 18:01:32 +0000337 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
338 "task=%p ntasks=%d head=%u tail=%u\n",
339 gtid, taskdata, thread_data->td.td_deque_ntasks,
340 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
Andrey Churbanov5dee8c42016-12-14 08:29:00 +0000341
Jonathan Peyton30419822017-05-12 18:01:32 +0000342 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000343
Jonathan Peyton30419822017-05-12 18:01:32 +0000344 return TASK_SUCCESSFULLY_PUSHED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000345}
346
Jonathan Peyton30419822017-05-12 18:01:32 +0000347// __kmp_pop_current_task_from_thread: set up current task from called thread
348// when team ends
349//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000350// this_thr: thread structure to set current_task in.
Jonathan Peyton30419822017-05-12 18:01:32 +0000351void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
352 KF_TRACE(10, ("__kmp_pop_current_task_from_thread(enter): T#%d "
353 "this_thread=%p, curtask=%p, "
354 "curtask_parent=%p\n",
355 0, this_thr, this_thr->th.th_current_task,
356 this_thr->th.th_current_task->td_parent));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000357
Jonathan Peyton30419822017-05-12 18:01:32 +0000358 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000359
Jonathan Peyton30419822017-05-12 18:01:32 +0000360 KF_TRACE(10, ("__kmp_pop_current_task_from_thread(exit): T#%d "
361 "this_thread=%p, curtask=%p, "
362 "curtask_parent=%p\n",
363 0, this_thr, this_thr->th.th_current_task,
364 this_thr->th.th_current_task->td_parent));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000365}
366
Jonathan Peyton30419822017-05-12 18:01:32 +0000367// __kmp_push_current_task_to_thread: set up current task in called thread for a
368// new team
369//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000370// this_thr: thread structure to set up
371// team: team for implicit task data
372// tid: thread within team to set up
Jonathan Peyton30419822017-05-12 18:01:32 +0000373void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
374 int tid) {
375 // current task of the thread is a parent of the new just created implicit
376 // tasks of new team
377 KF_TRACE(10, ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
378 "curtask=%p "
379 "parent_task=%p\n",
380 tid, this_thr, this_thr->th.th_current_task,
381 team->t.t_implicit_task_taskdata[tid].td_parent));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000382
Jonathan Peyton30419822017-05-12 18:01:32 +0000383 KMP_DEBUG_ASSERT(this_thr != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000384
Jonathan Peyton30419822017-05-12 18:01:32 +0000385 if (tid == 0) {
386 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
387 team->t.t_implicit_task_taskdata[0].td_parent =
388 this_thr->th.th_current_task;
389 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000390 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000391 } else {
392 team->t.t_implicit_task_taskdata[tid].td_parent =
393 team->t.t_implicit_task_taskdata[0].td_parent;
394 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
395 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000396
Jonathan Peyton30419822017-05-12 18:01:32 +0000397 KF_TRACE(10, ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
398 "curtask=%p "
399 "parent_task=%p\n",
400 tid, this_thr, this_thr->th.th_current_task,
401 team->t.t_implicit_task_taskdata[tid].td_parent));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000402}
403
Jim Cownie5e8470a2013-09-27 10:38:44 +0000404// __kmp_task_start: bookkeeping for a task starting execution
Jonathan Peyton30419822017-05-12 18:01:32 +0000405//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000406// GTID: global thread id of calling thread
407// task: task starting execution
408// current_task: task suspending
Jonathan Peyton30419822017-05-12 18:01:32 +0000409static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
410 kmp_taskdata_t *current_task) {
411 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
412 kmp_info_t *thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000413
Jonathan Peyton30419822017-05-12 18:01:32 +0000414 KA_TRACE(10,
415 ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
416 gtid, taskdata, current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000417
Jonathan Peyton30419822017-05-12 18:01:32 +0000418 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000419
Jonathan Peyton30419822017-05-12 18:01:32 +0000420 // mark currently executing task as suspended
421 // TODO: GEH - make sure root team implicit task is initialized properly.
422 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
423 current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000424
Jonathan Peyton30419822017-05-12 18:01:32 +0000425// Add task to stack if tied
Jim Cownie5e8470a2013-09-27 10:38:44 +0000426#ifdef BUILD_TIED_TASK_STACK
Jonathan Peyton30419822017-05-12 18:01:32 +0000427 if (taskdata->td_flags.tiedness == TASK_TIED) {
428 __kmp_push_task_stack(gtid, thread, taskdata);
429 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000430#endif /* BUILD_TIED_TASK_STACK */
431
Jonathan Peyton30419822017-05-12 18:01:32 +0000432 // mark starting task as executing and as current task
433 thread->th.th_current_task = taskdata;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000434
Jonathan Peyton30419822017-05-12 18:01:32 +0000435 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
436 taskdata->td_flags.tiedness == TASK_UNTIED);
437 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
438 taskdata->td_flags.tiedness == TASK_UNTIED);
439 taskdata->td_flags.started = 1;
440 taskdata->td_flags.executing = 1;
441 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
442 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000443
Jonathan Peyton30419822017-05-12 18:01:32 +0000444 // GEH TODO: shouldn't we pass some sort of location identifier here?
445 // APT: yes, we will pass location here.
446 // need to store current thread state (in a thread or taskdata structure)
447 // before setting work_state, otherwise wrong state is set after end of task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000448
Jonathan Peyton30419822017-05-12 18:01:32 +0000449 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000450
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000451#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000452 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
453 kmp_taskdata_t *parent = taskdata->td_parent;
454 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
455 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
456 parent ? &(parent->ompt_task_info.frame) : NULL,
457 taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.function);
458 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000459#endif
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000460#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +0000461 /* OMPT emit all dependences if requested by the tool */
462 if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
463 ompt_callbacks.ompt_callback(ompt_event_task_dependences)) {
464 ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
465 taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.deps,
466 taskdata->ompt_task_info.ndeps);
467 /* We can now free the allocated memory for the dependencies */
468 KMP_OMPT_DEPS_FREE(thread, taskdata->ompt_task_info.deps);
469 taskdata->ompt_task_info.deps = NULL;
470 taskdata->ompt_task_info.ndeps = 0;
471 }
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000472#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000473
Jonathan Peyton30419822017-05-12 18:01:32 +0000474 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000475}
476
Jonathan Peyton30419822017-05-12 18:01:32 +0000477// __kmpc_omp_task_begin_if0: report that a given serialized task has started
478// execution
479//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000480// loc_ref: source location information; points to beginning of task block.
481// gtid: global thread number.
482// task: task thunk for the started task.
Jonathan Peyton30419822017-05-12 18:01:32 +0000483void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
484 kmp_task_t *task) {
485 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
486 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000487
Jonathan Peyton30419822017-05-12 18:01:32 +0000488 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
489 "current_task=%p\n",
490 gtid, loc_ref, taskdata, current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000491
Jonathan Peyton30419822017-05-12 18:01:32 +0000492 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
493 // untied task needs to increment counter so that the task structure is not
494 // freed prematurely
495 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
496 KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
497 "incremented for task %p\n",
498 gtid, counter, taskdata));
499 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000500
Jonathan Peyton30419822017-05-12 18:01:32 +0000501 taskdata->td_flags.task_serial =
502 1; // Execute this task immediately, not deferred.
503 __kmp_task_start(gtid, task, current_task);
Jonathan Peytone6643da2016-04-18 21:35:14 +0000504
Jonathan Peyton30419822017-05-12 18:01:32 +0000505 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
506 loc_ref, taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000507
Jonathan Peyton30419822017-05-12 18:01:32 +0000508 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000509}
510
511#ifdef TASK_UNUSED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000512// __kmpc_omp_task_begin: report that a given task has started execution
513// NEVER GENERATED BY COMPILER, DEPRECATED!!!
Jonathan Peyton30419822017-05-12 18:01:32 +0000514void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
515 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000516
Jonathan Peyton30419822017-05-12 18:01:32 +0000517 KA_TRACE(
518 10,
519 ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
520 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000521
Jonathan Peyton30419822017-05-12 18:01:32 +0000522 __kmp_task_start(gtid, task, current_task);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000523
Jonathan Peyton30419822017-05-12 18:01:32 +0000524 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
525 loc_ref, KMP_TASK_TO_TASKDATA(task)));
526 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000527}
528#endif // TASK_UNUSED
529
Jim Cownie5e8470a2013-09-27 10:38:44 +0000530// __kmp_free_task: free the current task space and the space for shareds
Jim Cownie5e8470a2013-09-27 10:38:44 +0000531//
532// gtid: Global thread ID of calling thread
533// taskdata: task to free
534// thread: thread data structure of caller
Jonathan Peyton30419822017-05-12 18:01:32 +0000535static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
536 kmp_info_t *thread) {
537 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", gtid,
538 taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000539
Jonathan Peyton30419822017-05-12 18:01:32 +0000540 // Check to make sure all flags and counters have the correct values
541 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
542 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
543 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
544 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
545 KMP_DEBUG_ASSERT(TCR_4(taskdata->td_allocated_child_tasks) == 0 ||
546 taskdata->td_flags.task_serial == 1);
547 KMP_DEBUG_ASSERT(TCR_4(taskdata->td_incomplete_child_tasks) == 0);
548
549 taskdata->td_flags.freed = 1;
550 ANNOTATE_HAPPENS_BEFORE(taskdata);
551// deallocate the taskdata and shared variable blocks associated with this task
552#if USE_FAST_MEMORY
553 __kmp_fast_free(thread, taskdata);
554#else /* ! USE_FAST_MEMORY */
555 __kmp_thread_free(thread, taskdata);
Jonathan Peyton0ac7b752016-10-18 17:39:06 +0000556#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000557
Jonathan Peyton30419822017-05-12 18:01:32 +0000558 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000559}
560
Jonathan Peyton30419822017-05-12 18:01:32 +0000561// __kmp_free_task_and_ancestors: free the current task and ancestors without
562// children
563//
564// gtid: Global thread ID of calling thread
565// taskdata: task to free
566// thread: thread data structure of caller
567static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
568 kmp_taskdata_t *taskdata,
569 kmp_info_t *thread) {
570#if OMP_45_ENABLED
571 // Proxy tasks must always be allowed to free their parents
572 // because they can be run in background even in serial mode.
573 kmp_int32 team_serial =
574 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
575 !taskdata->td_flags.proxy;
576#else
577 kmp_int32 team_serial =
578 taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser;
579#endif
580 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
581
582 kmp_int32 children =
583 KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_allocated_child_tasks)) -
584 1;
585 KMP_DEBUG_ASSERT(children >= 0);
586
587 // Now, go up the ancestor tree to see if any ancestors can now be freed.
588 while (children == 0) {
589 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
590
591 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
592 "and freeing itself\n",
593 gtid, taskdata));
594
595 // --- Deallocate my ancestor task ---
596 __kmp_free_task(gtid, taskdata, thread);
597
598 taskdata = parent_taskdata;
599
600 // Stop checking ancestors at implicit task instead of walking up ancestor
601 // tree to avoid premature deallocation of ancestors.
602 if (team_serial || taskdata->td_flags.tasktype == TASK_IMPLICIT)
603 return;
604
605 // Predecrement simulated by "- 1" calculation
606 children = KMP_TEST_THEN_DEC32(
607 (kmp_int32 *)(&taskdata->td_allocated_child_tasks)) -
608 1;
609 KMP_DEBUG_ASSERT(children >= 0);
610 }
611
612 KA_TRACE(
613 20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
614 "not freeing it yet\n",
615 gtid, taskdata, children));
616}
617
Jim Cownie5e8470a2013-09-27 10:38:44 +0000618// __kmp_task_finish: bookkeeping to do when a task finishes execution
Jonathan Peyton30419822017-05-12 18:01:32 +0000619//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000620// gtid: global thread ID for calling thread
621// task: task to be finished
622// resumed_task: task to be resumed. (may be NULL if task is serialized)
Jonathan Peyton30419822017-05-12 18:01:32 +0000623static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
624 kmp_taskdata_t *resumed_task) {
625 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
626 kmp_info_t *thread = __kmp_threads[gtid];
627 kmp_task_team_t *task_team =
628 thread->th.th_task_team; // might be NULL for serial teams...
629 kmp_int32 children = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000630
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000631#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000632 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_end)) {
633 kmp_taskdata_t *parent = taskdata->td_parent;
634 ompt_callbacks.ompt_callback(ompt_event_task_end)(
635 taskdata->ompt_task_info.task_id);
636 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000637#endif
638
Jonathan Peyton30419822017-05-12 18:01:32 +0000639 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
640 "task %p\n",
641 gtid, taskdata, resumed_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000642
Jonathan Peyton30419822017-05-12 18:01:32 +0000643 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000644
Jonathan Peyton30419822017-05-12 18:01:32 +0000645// Pop task from stack if tied
Jim Cownie5e8470a2013-09-27 10:38:44 +0000646#ifdef BUILD_TIED_TASK_STACK
Jonathan Peyton30419822017-05-12 18:01:32 +0000647 if (taskdata->td_flags.tiedness == TASK_TIED) {
648 __kmp_pop_task_stack(gtid, thread, taskdata);
649 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000650#endif /* BUILD_TIED_TASK_STACK */
651
Jonathan Peyton30419822017-05-12 18:01:32 +0000652 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
653 // untied task needs to check the counter so that the task structure is not
654 // freed prematurely
655 kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
656 KA_TRACE(
657 20,
658 ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
659 gtid, counter, taskdata));
660 if (counter > 0) {
661 // untied task is not done, to be continued possibly by other thread, do
662 // not free it now
663 if (resumed_task == NULL) {
664 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
665 resumed_task = taskdata->td_parent; // In a serialized task, the resumed
666 // task is the parent
667 }
668 thread->th.th_current_task = resumed_task; // restore current_task
669 resumed_task->td_flags.executing = 1; // resume previous task
670 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, "
671 "resuming task %p\n",
672 gtid, taskdata, resumed_task));
673 return;
Jonathan Peytone6643da2016-04-18 21:35:14 +0000674 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000675 }
Jonathan Peytone6643da2016-04-18 21:35:14 +0000676
Jonathan Peyton30419822017-05-12 18:01:32 +0000677 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
678 taskdata->td_flags.complete = 1; // mark the task as completed
679 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
680 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000681
Jonathan Peyton30419822017-05-12 18:01:32 +0000682 // Only need to keep track of count if team parallel and tasking not
683 // serialized
684 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
685 // Predecrement simulated by "- 1" calculation
686 children =
687 KMP_TEST_THEN_DEC32(
688 (kmp_int32 *)(&taskdata->td_parent->td_incomplete_child_tasks)) -
689 1;
690 KMP_DEBUG_ASSERT(children >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000691#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000692 if (taskdata->td_taskgroup)
693 KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_taskgroup->count));
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000694#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000695 }
696 // if we found proxy tasks there could exist a dependency chain
697 // with the proxy task as origin
698 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
699 (task_team && task_team->tt.tt_found_proxy_tasks)) {
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000700#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000701 __kmp_release_deps(gtid, taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000702#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000703 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000704
Jonathan Peyton30419822017-05-12 18:01:32 +0000705 // td_flags.executing must be marked as 0 after __kmp_release_deps has been
706 // called. Othertwise, if a task is executed immediately from the release_deps
707 // code, the flag will be reset to 1 again by this same function
708 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
709 taskdata->td_flags.executing = 0; // suspend the finishing task
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000710
Jonathan Peyton30419822017-05-12 18:01:32 +0000711 KA_TRACE(
712 20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
713 gtid, taskdata, children));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000714
Jim Cownie181b4bb2013-12-23 17:28:57 +0000715#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000716 /* If the tasks' destructor thunk flag has been set, we need to invoke the
717 destructor thunk that has been generated by the compiler. The code is
718 placed here, since at this point other tasks might have been released
719 hence overlapping the destructor invokations with some other work in the
720 released tasks. The OpenMP spec is not specific on when the destructors
721 are invoked, so we should be free to choose. */
722 if (taskdata->td_flags.destructors_thunk) {
723 kmp_routine_entry_t destr_thunk = task->data1.destructors;
724 KMP_ASSERT(destr_thunk);
725 destr_thunk(gtid, task);
726 }
Jim Cownie181b4bb2013-12-23 17:28:57 +0000727#endif // OMP_40_ENABLED
728
Jonathan Peyton30419822017-05-12 18:01:32 +0000729 // bookkeeping for resuming task:
730 // GEH - note tasking_ser => task_serial
731 KMP_DEBUG_ASSERT(
732 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
733 taskdata->td_flags.task_serial);
734 if (taskdata->td_flags.task_serial) {
735 if (resumed_task == NULL) {
736 resumed_task = taskdata->td_parent; // In a serialized task, the resumed
737 // task is the parent
738 } else
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000739#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000740 if (!(task_team && task_team->tt.tt_found_proxy_tasks))
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000741#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000742 {
743 // verify resumed task passed in points to parent
744 KMP_DEBUG_ASSERT(resumed_task == taskdata->td_parent);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000745 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000746 } else {
747 KMP_DEBUG_ASSERT(resumed_task !=
748 NULL); // verify that resumed task is passed as arguemnt
749 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000750
Jonathan Peyton30419822017-05-12 18:01:32 +0000751 // Free this task and then ancestor tasks if they have no children.
752 // Restore th_current_task first as suggested by John:
753 // johnmc: if an asynchronous inquiry peers into the runtime system
754 // it doesn't see the freed task as the current task.
755 thread->th.th_current_task = resumed_task;
756 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000757
Jonathan Peyton30419822017-05-12 18:01:32 +0000758 // TODO: GEH - make sure root team implicit task is initialized properly.
759 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
760 resumed_task->td_flags.executing = 1; // resume previous task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000761
Jonathan Peyton30419822017-05-12 18:01:32 +0000762 KA_TRACE(
763 10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
764 gtid, taskdata, resumed_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000765
Jonathan Peyton30419822017-05-12 18:01:32 +0000766 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000767}
768
Jim Cownie5e8470a2013-09-27 10:38:44 +0000769// __kmpc_omp_task_complete_if0: report that a task has completed execution
Jonathan Peyton30419822017-05-12 18:01:32 +0000770//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000771// loc_ref: source location information; points to end of task block.
772// gtid: global thread number.
773// task: task thunk for the completed task.
Jonathan Peyton30419822017-05-12 18:01:32 +0000774void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
775 kmp_task_t *task) {
776 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
777 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
778 // this routine will provide task to resume
779 __kmp_task_finish(gtid, task, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000780
Jonathan Peyton30419822017-05-12 18:01:32 +0000781 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
782 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
783 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000784}
785
786#ifdef TASK_UNUSED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000787// __kmpc_omp_task_complete: report that a task has completed execution
788// NEVER GENERATED BY COMPILER, DEPRECATED!!!
Jonathan Peyton30419822017-05-12 18:01:32 +0000789void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
790 kmp_task_t *task) {
791 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
792 loc_ref, KMP_TASK_TO_TASKDATA(task)));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000793
Jonathan Peyton30419822017-05-12 18:01:32 +0000794 __kmp_task_finish(gtid, task, NULL); // Not sure how to find task to resume
Jim Cownie5e8470a2013-09-27 10:38:44 +0000795
Jonathan Peyton30419822017-05-12 18:01:32 +0000796 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
797 loc_ref, KMP_TASK_TO_TASKDATA(task)));
798 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000799}
800#endif // TASK_UNUSED
801
Andrey Churbanove5f44922015-04-29 16:22:07 +0000802#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000803// __kmp_task_init_ompt: Initialize OMPT fields maintained by a task. This will
804// only be called after ompt_tool, so we already know whether ompt is enabled
805// or not.
806static inline void __kmp_task_init_ompt(kmp_taskdata_t *task, int tid,
807 void *function) {
808 if (ompt_enabled) {
809 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
810 task->ompt_task_info.function = function;
811 task->ompt_task_info.frame.exit_runtime_frame = NULL;
812 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000813#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000814 task->ompt_task_info.ndeps = 0;
815 task->ompt_task_info.deps = NULL;
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000816#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +0000817 }
Andrey Churbanove5f44922015-04-29 16:22:07 +0000818}
819#endif
820
Jonathan Peyton30419822017-05-12 18:01:32 +0000821// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit
822// task for a given thread
Jim Cownie5e8470a2013-09-27 10:38:44 +0000823//
824// loc_ref: reference to source location of parallel region
825// this_thr: thread data structure corresponding to implicit task
826// team: team for this_thr
827// tid: thread id of given thread within team
828// set_curr_task: TRUE if need to push current task to thread
Jonathan Peyton30419822017-05-12 18:01:32 +0000829// NOTE: Routine does not set up the implicit task ICVS. This is assumed to
830// have already been done elsewhere.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000831// TODO: Get better loc_ref. Value passed in may be NULL
Jonathan Peyton30419822017-05-12 18:01:32 +0000832void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
833 kmp_team_t *team, int tid, int set_curr_task) {
834 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000835
Jonathan Peyton30419822017-05-12 18:01:32 +0000836 KF_TRACE(
837 10,
838 ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
839 tid, team, task, set_curr_task ? "TRUE" : "FALSE"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000840
Jonathan Peyton30419822017-05-12 18:01:32 +0000841 task->td_task_id = KMP_GEN_TASK_ID();
842 task->td_team = team;
843 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info
844 // in debugger)
845 task->td_ident = loc_ref;
846 task->td_taskwait_ident = NULL;
847 task->td_taskwait_counter = 0;
848 task->td_taskwait_thread = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000849
Jonathan Peyton30419822017-05-12 18:01:32 +0000850 task->td_flags.tiedness = TASK_TIED;
851 task->td_flags.tasktype = TASK_IMPLICIT;
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000852#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000853 task->td_flags.proxy = TASK_FULL;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000854#endif
855
Jonathan Peyton30419822017-05-12 18:01:32 +0000856 // All implicit tasks are executed immediately, not deferred
857 task->td_flags.task_serial = 1;
858 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
859 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000860
Jonathan Peyton30419822017-05-12 18:01:32 +0000861 task->td_flags.started = 1;
862 task->td_flags.executing = 1;
863 task->td_flags.complete = 0;
864 task->td_flags.freed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000865
Jim Cownie181b4bb2013-12-23 17:28:57 +0000866#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000867 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000868#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000869
Jonathan Peyton30419822017-05-12 18:01:32 +0000870 if (set_curr_task) { // only do this init first time thread is created
871 task->td_incomplete_child_tasks = 0;
872 task->td_allocated_child_tasks = 0; // Not used: don't need to
873// deallocate implicit task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000874#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000875 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
876 task->td_dephash = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000877#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000878 __kmp_push_current_task_to_thread(this_thr, team, tid);
879 } else {
880 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
881 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
882 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000883
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000884#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000885 __kmp_task_init_ompt(task, tid, NULL);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000886#endif
887
Jonathan Peyton30419822017-05-12 18:01:32 +0000888 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
889 team, task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000890}
891
Jonathan Peyton30419822017-05-12 18:01:32 +0000892// __kmp_finish_implicit_task: Release resources associated to implicit tasks
893// at the end of parallel regions. Some resources are kept for reuse in the next
894// parallel region.
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +0000895//
Jonathan Peyton30419822017-05-12 18:01:32 +0000896// thread: thread data structure corresponding to implicit task
897void __kmp_finish_implicit_task(kmp_info_t *thread) {
898 kmp_taskdata_t *task = thread->th.th_current_task;
899 if (task->td_dephash)
900 __kmp_dephash_free_entries(thread, task->td_dephash);
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +0000901}
902
Jonathan Peyton30419822017-05-12 18:01:32 +0000903// __kmp_free_implicit_task: Release resources associated to implicit tasks
904// when these are destroyed regions
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +0000905//
Jonathan Peyton30419822017-05-12 18:01:32 +0000906// thread: thread data structure corresponding to implicit task
907void __kmp_free_implicit_task(kmp_info_t *thread) {
908 kmp_taskdata_t *task = thread->th.th_current_task;
909 if (task->td_dephash)
910 __kmp_dephash_free(thread, task->td_dephash);
911 task->td_dephash = NULL;
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +0000912}
913
Jonathan Peyton30419822017-05-12 18:01:32 +0000914// Round up a size to a power of two specified by val: Used to insert padding
915// between structures co-allocated using a single malloc() call
916static size_t __kmp_round_up_to_val(size_t size, size_t val) {
917 if (size & (val - 1)) {
918 size &= ~(val - 1);
919 if (size <= KMP_SIZE_T_MAX - val) {
920 size += val; // Round up if there is no overflow.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000921 }; // if
Jonathan Peyton30419822017-05-12 18:01:32 +0000922 }; // if
923 return size;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000924} // __kmp_round_up_to_va
925
Jim Cownie5e8470a2013-09-27 10:38:44 +0000926// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
927//
928// loc_ref: source location information
929// gtid: global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +0000930// flags: include tiedness & task type (explicit vs. implicit) of the ''new''
931// task encountered. Converted from kmp_int32 to kmp_tasking_flags_t in routine.
932// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including
933// private vars accessed in task.
934// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed
935// in task.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000936// task_entry: Pointer to task code entry point generated by compiler.
937// returns: a pointer to the allocated kmp_task_t structure (task).
Jonathan Peyton30419822017-05-12 18:01:32 +0000938kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
939 kmp_tasking_flags_t *flags,
940 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
941 kmp_routine_entry_t task_entry) {
942 kmp_task_t *task;
943 kmp_taskdata_t *taskdata;
944 kmp_info_t *thread = __kmp_threads[gtid];
945 kmp_team_t *team = thread->th.th_team;
946 kmp_taskdata_t *parent_task = thread->th.th_current_task;
947 size_t shareds_offset;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000948
Jonathan Peyton30419822017-05-12 18:01:32 +0000949 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
950 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
951 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
952 sizeof_shareds, task_entry));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000953
Jonathan Peyton30419822017-05-12 18:01:32 +0000954 if (parent_task->td_flags.final) {
955 if (flags->merged_if0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000956 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000957 flags->final = 1;
958 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000959
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000960#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000961 if (flags->proxy == TASK_PROXY) {
962 flags->tiedness = TASK_UNTIED;
963 flags->merged_if0 = 1;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000964
Jonathan Peyton30419822017-05-12 18:01:32 +0000965 /* are we running in a sequential parallel or tskm_immediate_exec... we need
966 tasking support enabled */
967 if ((thread->th.th_task_team) == NULL) {
968 /* This should only happen if the team is serialized
969 setup a task team and propagate it to the thread */
970 KMP_DEBUG_ASSERT(team->t.t_serialized);
971 KA_TRACE(30,
972 ("T#%d creating task team in __kmp_task_alloc for proxy task\n",
973 gtid));
974 __kmp_task_team_setup(
975 thread, team,
976 1); // 1 indicates setup the current team regardless of nthreads
977 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000978 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000979 kmp_task_team_t *task_team = thread->th.th_task_team;
980
981 /* tasking must be enabled now as the task might not be pushed */
982 if (!KMP_TASKING_ENABLED(task_team)) {
983 KA_TRACE(
984 30,
985 ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
986 __kmp_enable_tasking(task_team, thread);
987 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
988 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
989 // No lock needed since only owner can allocate
990 if (thread_data->td.td_deque == NULL) {
991 __kmp_alloc_task_deque(thread, thread_data);
992 }
993 }
994
995 if (task_team->tt.tt_found_proxy_tasks == FALSE)
996 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
997 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000998#endif
999
Jonathan Peyton30419822017-05-12 18:01:32 +00001000 // Calculate shared structure offset including padding after kmp_task_t struct
1001 // to align pointers in shared struct
1002 shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1003 shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001004
Jonathan Peyton30419822017-05-12 18:01:32 +00001005 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
1006 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1007 shareds_offset));
1008 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1009 sizeof_shareds));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001010
Jonathan Peyton30419822017-05-12 18:01:32 +00001011// Avoid double allocation here by combining shareds with taskdata
1012#if USE_FAST_MEMORY
1013 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1014 sizeof_shareds);
1015#else /* ! USE_FAST_MEMORY */
1016 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
1017 sizeof_shareds);
1018#endif /* USE_FAST_MEMORY */
1019 ANNOTATE_HAPPENS_AFTER(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001020
Jonathan Peyton30419822017-05-12 18:01:32 +00001021 task = KMP_TASKDATA_TO_TASK(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001022
Jonathan Peyton30419822017-05-12 18:01:32 +00001023// Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +00001024#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jonathan Peyton30419822017-05-12 18:01:32 +00001025 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0);
1026 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001027#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001028 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0);
1029 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001030#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001031 if (sizeof_shareds > 0) {
1032 // Avoid double allocation here by combining shareds with taskdata
1033 task->shareds = &((char *)taskdata)[shareds_offset];
1034 // Make sure shareds struct is aligned to pointer size
1035 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
1036 0);
1037 } else {
1038 task->shareds = NULL;
1039 }
1040 task->routine = task_entry;
1041 task->part_id = 0; // AC: Always start with 0 part id
Jim Cownie5e8470a2013-09-27 10:38:44 +00001042
Jonathan Peyton30419822017-05-12 18:01:32 +00001043 taskdata->td_task_id = KMP_GEN_TASK_ID();
1044 taskdata->td_team = team;
1045 taskdata->td_alloc_thread = thread;
1046 taskdata->td_parent = parent_task;
1047 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
1048 taskdata->td_untied_count = 0;
1049 taskdata->td_ident = loc_ref;
1050 taskdata->td_taskwait_ident = NULL;
1051 taskdata->td_taskwait_counter = 0;
1052 taskdata->td_taskwait_thread = 0;
1053 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001054#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001055 // avoid copying icvs for proxy tasks
1056 if (flags->proxy == TASK_FULL)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001057#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001058 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001059
Jonathan Peyton30419822017-05-12 18:01:32 +00001060 taskdata->td_flags.tiedness = flags->tiedness;
1061 taskdata->td_flags.final = flags->final;
1062 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001063#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001064 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001065#endif // OMP_40_ENABLED
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001066#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001067 taskdata->td_flags.proxy = flags->proxy;
1068 taskdata->td_task_team = thread->th.th_task_team;
1069 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001070#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001071 taskdata->td_flags.tasktype = TASK_EXPLICIT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001072
Jonathan Peyton30419822017-05-12 18:01:32 +00001073 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1074 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001075
Jonathan Peyton30419822017-05-12 18:01:32 +00001076 // GEH - TODO: fix this to copy parent task's value of team_serial flag
1077 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001078
Jonathan Peyton30419822017-05-12 18:01:32 +00001079 // GEH - Note we serialize the task if the team is serialized to make sure
1080 // implicit parallel region tasks are not left until program termination to
1081 // execute. Also, it helps locality to execute immediately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001082
Jonathan Peyton30419822017-05-12 18:01:32 +00001083 taskdata->td_flags.task_serial =
1084 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1085 taskdata->td_flags.tasking_ser);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001086
Jonathan Peyton30419822017-05-12 18:01:32 +00001087 taskdata->td_flags.started = 0;
1088 taskdata->td_flags.executing = 0;
1089 taskdata->td_flags.complete = 0;
1090 taskdata->td_flags.freed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001091
Jonathan Peyton30419822017-05-12 18:01:32 +00001092 taskdata->td_flags.native = flags->native;
1093
1094 taskdata->td_incomplete_child_tasks = 0;
1095 taskdata->td_allocated_child_tasks = 1; // start at one because counts current
1096// task and children
Jim Cownie5e8470a2013-09-27 10:38:44 +00001097#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001098 taskdata->td_taskgroup =
1099 parent_task->td_taskgroup; // task inherits taskgroup from the parent task
1100 taskdata->td_dephash = NULL;
1101 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001102#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001103
Jonathan Peyton30419822017-05-12 18:01:32 +00001104// Only need to keep track of child task counts if team parallel and tasking not
1105// serialized or if it is a proxy task
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001106#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001107 if (flags->proxy == TASK_PROXY ||
1108 !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001109#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001110 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001111#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001112 {
1113 KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_incomplete_child_tasks));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001114#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001115 if (parent_task->td_taskgroup)
1116 KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_taskgroup->count));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001117#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001118 // Only need to keep track of allocated child tasks for explicit tasks since
1119 // implicit not deallocated
1120 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1121 KMP_TEST_THEN_INC32(
1122 (kmp_int32 *)(&taskdata->td_parent->td_allocated_child_tasks));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001123 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001124 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001125
Jonathan Peyton30419822017-05-12 18:01:32 +00001126 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1127 gtid, taskdata, taskdata->td_parent));
1128 ANNOTATE_HAPPENS_BEFORE(task);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001129
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001130#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001131 __kmp_task_init_ompt(taskdata, gtid, (void *)task_entry);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001132#endif
1133
Jonathan Peyton30419822017-05-12 18:01:32 +00001134 return task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001135}
1136
Jonathan Peyton30419822017-05-12 18:01:32 +00001137kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
1138 kmp_int32 flags, size_t sizeof_kmp_task_t,
1139 size_t sizeof_shareds,
1140 kmp_routine_entry_t task_entry) {
1141 kmp_task_t *retval;
1142 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001143
Jonathan Peyton30419822017-05-12 18:01:32 +00001144 input_flags->native = FALSE;
1145// __kmp_task_alloc() sets up all other runtime flags
Jim Cownie5e8470a2013-09-27 10:38:44 +00001146
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001147#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001148 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
1149 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1150 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1151 input_flags->proxy ? "proxy" : "", sizeof_kmp_task_t,
1152 sizeof_shareds, task_entry));
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001153#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001154 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1155 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1156 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1157 sizeof_kmp_task_t, sizeof_shareds, task_entry));
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001158#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001159
Jonathan Peyton30419822017-05-12 18:01:32 +00001160 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1161 sizeof_shareds, task_entry);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001162
Jonathan Peyton30419822017-05-12 18:01:32 +00001163 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001164
Jonathan Peyton30419822017-05-12 18:01:32 +00001165 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001166}
1167
Jim Cownie5e8470a2013-09-27 10:38:44 +00001168// __kmp_invoke_task: invoke the specified task
1169//
1170// gtid: global thread ID of caller
1171// task: the task to invoke
1172// current_task: the task to resume after task invokation
Jonathan Peyton30419822017-05-12 18:01:32 +00001173static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1174 kmp_taskdata_t *current_task) {
1175 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1176 kmp_uint64 cur_time;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001177#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001178 int discard = 0 /* false */;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001179#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001180 KA_TRACE(
1181 30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1182 gtid, taskdata, current_task));
1183 KMP_DEBUG_ASSERT(task);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001184#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001185 if (taskdata->td_flags.proxy == TASK_PROXY &&
1186 taskdata->td_flags.complete == 1) {
1187 // This is a proxy task that was already completed but it needs to run
1188 // its bottom-half finish
1189 KA_TRACE(
1190 30,
1191 ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1192 gtid, taskdata));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001193
Jonathan Peyton30419822017-05-12 18:01:32 +00001194 __kmp_bottom_half_finish_proxy(gtid, task);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001195
Jonathan Peyton30419822017-05-12 18:01:32 +00001196 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for "
1197 "proxy task %p, resuming task %p\n",
1198 gtid, taskdata, current_task));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001199
Jonathan Peyton30419822017-05-12 18:01:32 +00001200 return;
1201 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001202#endif
1203
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001204#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001205 if (__kmp_forkjoin_frames_mode == 3) {
1206 // Get the current time stamp to measure task execution time to correct
1207 // barrier imbalance time
1208 cur_time = __itt_get_timestamp();
1209 }
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001210#endif
1211
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001212#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001213 // Proxy tasks are not handled by the runtime
1214 if (taskdata->td_flags.proxy != TASK_PROXY) {
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001215#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001216 ANNOTATE_HAPPENS_AFTER(task);
1217 __kmp_task_start(gtid, task, current_task);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001218#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001219 }
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001220#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001221
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001222#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001223 ompt_thread_info_t oldInfo;
1224 kmp_info_t *thread;
1225 if (ompt_enabled) {
1226 // Store the threads states and restore them after the task
1227 thread = __kmp_threads[gtid];
1228 oldInfo = thread->th.ompt_thread_info;
1229 thread->th.ompt_thread_info.wait_id = 0;
1230 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1231 taskdata->ompt_task_info.frame.exit_runtime_frame =
1232 __builtin_frame_address(0);
1233 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001234#endif
1235
Jim Cownie181b4bb2013-12-23 17:28:57 +00001236#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001237 // TODO: cancel tasks if the parallel region has also been cancelled
1238 // TODO: check if this sequence can be hoisted above __kmp_task_start
1239 // if cancellation has been enabled for this run ...
1240 if (__kmp_omp_cancellation) {
1241 kmp_info_t *this_thr = __kmp_threads[gtid];
1242 kmp_team_t *this_team = this_thr->th.th_team;
1243 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1244 if ((taskgroup && taskgroup->cancel_request) ||
1245 (this_team->t.t_cancel_request == cancel_parallel)) {
1246 KMP_COUNT_BLOCK(TASK_cancelled);
1247 // this task belongs to a task group and we need to cancel it
1248 discard = 1 /* true */;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001249 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001250 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001251
Jonathan Peyton30419822017-05-12 18:01:32 +00001252 // Invoke the task routine and pass in relevant data.
1253 // Thunks generated by gcc take a different argument list.
1254 if (!discard) {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001255#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001256 KMP_COUNT_BLOCK(TASK_executed);
1257 switch (KMP_GET_THREAD_STATE()) {
1258 case FORK_JOIN_BARRIER:
1259 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1260 break;
1261 case PLAIN_BARRIER:
1262 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1263 break;
1264 case TASKYIELD:
1265 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1266 break;
1267 case TASKWAIT:
1268 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1269 break;
1270 case TASKGROUP:
1271 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1272 break;
1273 default:
1274 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1275 break;
1276 }
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001277#endif // KMP_STATS_ENABLED
Jim Cownie181b4bb2013-12-23 17:28:57 +00001278#endif // OMP_40_ENABLED
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001279
1280#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001281 /* let OMPT know that we're about to run this task */
1282 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
1283 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1284 current_task->ompt_task_info.task_id,
1285 taskdata->ompt_task_info.task_id);
1286 }
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001287#endif
1288
Jim Cownie5e8470a2013-09-27 10:38:44 +00001289#ifdef KMP_GOMP_COMPAT
Jonathan Peyton30419822017-05-12 18:01:32 +00001290 if (taskdata->td_flags.native) {
1291 ((void (*)(void *))(*(task->routine)))(task->shareds);
1292 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001293#endif /* KMP_GOMP_COMPAT */
Jonathan Peyton30419822017-05-12 18:01:32 +00001294 {
1295 (*(task->routine))(gtid, task);
1296 }
1297 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001298
1299#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001300 /* let OMPT know that we're returning to the callee task */
1301 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
1302 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1303 taskdata->ompt_task_info.task_id,
1304 current_task->ompt_task_info.task_id);
1305 }
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001306#endif
1307
Jim Cownie181b4bb2013-12-23 17:28:57 +00001308#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001309 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001310#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001311
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001312#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001313 if (ompt_enabled) {
1314 thread->th.ompt_thread_info = oldInfo;
1315 taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
1316 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001317#endif
1318
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001319#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001320 // Proxy tasks are not handled by the runtime
1321 if (taskdata->td_flags.proxy != TASK_PROXY) {
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001322#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001323 ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
1324 __kmp_task_finish(gtid, task, current_task);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001325#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001326 }
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001327#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001328
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001329#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001330 // Barrier imbalance - correct arrive time after the task finished
1331 if (__kmp_forkjoin_frames_mode == 3) {
1332 kmp_info_t *this_thr = __kmp_threads[gtid];
1333 if (this_thr->th.th_bar_arrive_time) {
1334 this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001335 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001336 }
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001337#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001338 KA_TRACE(
1339 30,
1340 ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1341 gtid, taskdata, current_task));
1342 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001343}
1344
Jim Cownie5e8470a2013-09-27 10:38:44 +00001345// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1346//
1347// loc_ref: location of original task pragma (ignored)
1348// gtid: Global Thread ID of encountering thread
1349// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1350// Returns:
Jonathan Peyton30419822017-05-12 18:01:32 +00001351// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
1352// be resumed later.
1353// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
1354// resumed later.
1355kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
1356 kmp_task_t *new_task) {
1357 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001358
Jonathan Peyton30419822017-05-12 18:01:32 +00001359 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1360 loc_ref, new_taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001361
Jonathan Peyton30419822017-05-12 18:01:32 +00001362 /* Should we execute the new task or queue it? For now, let's just always try
1363 to queue it. If the queue fills up, then we'll execute it. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001364
Jonathan Peyton30419822017-05-12 18:01:32 +00001365 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
1366 { // Execute this task immediately
1367 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1368 new_taskdata->td_flags.task_serial = 1;
1369 __kmp_invoke_task(gtid, new_task, current_task);
1370 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001371
Jonathan Peyton30419822017-05-12 18:01:32 +00001372 KA_TRACE(
1373 10,
1374 ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1375 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1376 gtid, loc_ref, new_taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001377
Jonathan Peyton30419822017-05-12 18:01:32 +00001378 ANNOTATE_HAPPENS_BEFORE(new_task);
1379 return TASK_CURRENT_NOT_QUEUED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001380}
1381
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001382// __kmp_omp_task: Schedule a non-thread-switchable task for execution
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001383//
Jonathan Peyton30419822017-05-12 18:01:32 +00001384// gtid: Global Thread ID of encountering thread
1385// new_task:non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1386// serialize_immediate: if TRUE then if the task is executed immediately its
1387// execution will be serialized
1388// Returns:
1389// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
1390// be resumed later.
1391// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
1392// resumed later.
1393kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1394 bool serialize_immediate) {
1395 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001396
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001397#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001398 if (ompt_enabled) {
1399 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1400 __builtin_frame_address(1);
1401 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001402#endif
1403
Jonathan Peyton30419822017-05-12 18:01:32 +00001404/* Should we execute the new task or queue it? For now, let's just always try to
1405 queue it. If the queue fills up, then we'll execute it. */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001406#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001407 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
1408 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001409#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001410 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001411#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001412 { // Execute this task immediately
1413 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1414 if (serialize_immediate)
1415 new_taskdata->td_flags.task_serial = 1;
1416 __kmp_invoke_task(gtid, new_task, current_task);
1417 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001418
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001419#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001420 if (ompt_enabled) {
1421 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
1422 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001423#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001424
Jonathan Peyton30419822017-05-12 18:01:32 +00001425 ANNOTATE_HAPPENS_BEFORE(new_task);
1426 return TASK_CURRENT_NOT_QUEUED;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001427}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001428
Jonathan Peyton30419822017-05-12 18:01:32 +00001429// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a
1430// non-thread-switchable task from the parent thread only!
1431//
Jim Cownie5e8470a2013-09-27 10:38:44 +00001432// loc_ref: location of original task pragma (ignored)
1433// gtid: Global Thread ID of encountering thread
Jonathan Peyton30419822017-05-12 18:01:32 +00001434// new_task: non-thread-switchable task thunk allocated by
1435// __kmp_omp_task_alloc()
1436// Returns:
1437// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
1438// be resumed later.
1439// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
1440// resumed later.
1441kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
1442 kmp_task_t *new_task) {
1443 kmp_int32 res;
1444 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001445
Jonathan Peytond2eb3c72015-08-26 20:02:21 +00001446#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001447 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
Jonathan Peytond2eb3c72015-08-26 20:02:21 +00001448#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001449 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1450 new_taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001451
Jonathan Peyton30419822017-05-12 18:01:32 +00001452 res = __kmp_omp_task(gtid, new_task, true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001453
Jonathan Peyton30419822017-05-12 18:01:32 +00001454 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
1455 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1456 gtid, loc_ref, new_taskdata));
1457 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001458}
1459
Jonathan Peyton30419822017-05-12 18:01:32 +00001460// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are
1461// complete
1462kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
1463 kmp_taskdata_t *taskdata;
1464 kmp_info_t *thread;
1465 int thread_finished = FALSE;
1466 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001467
Jonathan Peyton30419822017-05-12 18:01:32 +00001468 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001469
Jonathan Peyton30419822017-05-12 18:01:32 +00001470 if (__kmp_tasking_mode != tskm_immediate_exec) {
1471 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark
1472 // begin wait?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001473
Jonathan Peyton30419822017-05-12 18:01:32 +00001474 thread = __kmp_threads[gtid];
1475 taskdata = thread->th.th_current_task;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001476
1477#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001478 ompt_task_id_t my_task_id;
1479 ompt_parallel_id_t my_parallel_id;
Jonathan Peyton61118492016-05-20 19:03:38 +00001480
Jonathan Peyton30419822017-05-12 18:01:32 +00001481 if (ompt_enabled) {
1482 kmp_team_t *team = thread->th.th_team;
1483 my_task_id = taskdata->ompt_task_info.task_id;
1484 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peyton61118492016-05-20 19:03:38 +00001485
Jonathan Peyton30419822017-05-12 18:01:32 +00001486 taskdata->ompt_task_info.frame.reenter_runtime_frame =
1487 __builtin_frame_address(1);
1488 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1489 ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(my_parallel_id,
1490 my_task_id);
1491 }
1492 }
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001493#endif
1494
Jonathan Peyton30419822017-05-12 18:01:32 +00001495// Debugger: The taskwait is active. Store location and thread encountered the
1496// taskwait.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001497#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001498// Note: These values are used by ITT events as well.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001499#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001500 taskdata->td_taskwait_counter += 1;
1501 taskdata->td_taskwait_ident = loc_ref;
1502 taskdata->td_taskwait_thread = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001503
1504#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001505 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1506 if (itt_sync_obj != NULL)
1507 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001508#endif /* USE_ITT_BUILD */
1509
Jonathan Peyton30419822017-05-12 18:01:32 +00001510 bool must_wait =
1511 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
Andrey Churbanovdd313b02016-11-01 08:33:36 +00001512
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001513#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001514 must_wait = must_wait || (thread->th.th_task_team != NULL &&
1515 thread->th.th_task_team->tt.tt_found_proxy_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001516#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001517 if (must_wait) {
1518 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
1519 while (TCR_4(taskdata->td_incomplete_child_tasks) != 0) {
1520 flag.execute_tasks(thread, gtid, FALSE,
1521 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1522 __kmp_task_stealing_constraint);
1523 }
1524 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001525#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001526 if (itt_sync_obj != NULL)
1527 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001528#endif /* USE_ITT_BUILD */
1529
Jonathan Peyton30419822017-05-12 18:01:32 +00001530 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark
1531 // end of wait?
1532 // Debugger: The taskwait is completed. Location remains, but thread is
1533 // negated.
1534 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001535
1536#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001537 if (ompt_enabled) {
1538 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1539 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(my_parallel_id,
1540 my_task_id);
1541 }
1542 taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001543 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001544#endif
1545 ANNOTATE_HAPPENS_AFTER(taskdata);
1546 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001547
Jonathan Peyton30419822017-05-12 18:01:32 +00001548 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1549 "returning TASK_CURRENT_NOT_QUEUED\n",
1550 gtid, taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001551
Jonathan Peyton30419822017-05-12 18:01:32 +00001552 return TASK_CURRENT_NOT_QUEUED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001553}
1554
Jim Cownie5e8470a2013-09-27 10:38:44 +00001555// __kmpc_omp_taskyield: switch to a different task
Jonathan Peyton30419822017-05-12 18:01:32 +00001556kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
1557 kmp_taskdata_t *taskdata;
1558 kmp_info_t *thread;
1559 int thread_finished = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001560
Jonathan Peyton30419822017-05-12 18:01:32 +00001561 KMP_COUNT_BLOCK(OMP_TASKYIELD);
1562 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001563
Jonathan Peyton30419822017-05-12 18:01:32 +00001564 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1565 gtid, loc_ref, end_part));
Jonathan Peyton45be4502015-08-11 21:36:41 +00001566
Jonathan Peyton30419822017-05-12 18:01:32 +00001567 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
1568 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark
1569 // begin wait?
Jim Cownie5e8470a2013-09-27 10:38:44 +00001570
Jonathan Peyton30419822017-05-12 18:01:32 +00001571 thread = __kmp_threads[gtid];
1572 taskdata = thread->th.th_current_task;
1573// Should we model this as a task wait or not?
1574// Debugger: The taskwait is active. Store location and thread encountered the
1575// taskwait.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001576#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001577// Note: These values are used by ITT events as well.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001578#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001579 taskdata->td_taskwait_counter += 1;
1580 taskdata->td_taskwait_ident = loc_ref;
1581 taskdata->td_taskwait_thread = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001582
1583#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001584 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1585 if (itt_sync_obj != NULL)
1586 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001587#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001588 if (!taskdata->td_flags.team_serial) {
1589 kmp_task_team_t *task_team = thread->th.th_task_team;
1590 if (task_team != NULL) {
1591 if (KMP_TASKING_ENABLED(task_team)) {
1592 __kmp_execute_tasks_32(
1593 thread, gtid, NULL, FALSE,
1594 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1595 __kmp_task_stealing_constraint);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001596 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001597 }
1598 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001599#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001600 if (itt_sync_obj != NULL)
1601 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001602#endif /* USE_ITT_BUILD */
1603
Jonathan Peyton30419822017-05-12 18:01:32 +00001604 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark
1605 // end of wait?
1606 // Debugger: The taskwait is completed. Location remains, but thread is
1607 // negated.
1608 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
1609 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001610
Jonathan Peyton30419822017-05-12 18:01:32 +00001611 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1612 "returning TASK_CURRENT_NOT_QUEUED\n",
1613 gtid, taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001614
Jonathan Peyton30419822017-05-12 18:01:32 +00001615 return TASK_CURRENT_NOT_QUEUED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001616}
1617
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001618// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
1619#if OMP_45_ENABLED
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001620// Task Reduction implementation
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001621
1622typedef struct kmp_task_red_flags {
Jonathan Peyton30419822017-05-12 18:01:32 +00001623 unsigned lazy_priv : 1; // hint: (1) use lazy allocation (big objects)
1624 unsigned reserved31 : 31;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001625} kmp_task_red_flags_t;
1626
1627// internal structure for reduction data item related info
1628typedef struct kmp_task_red_data {
Jonathan Peyton30419822017-05-12 18:01:32 +00001629 void *reduce_shar; // shared reduction item
1630 size_t reduce_size; // size of data item
1631 void *reduce_priv; // thread specific data
1632 void *reduce_pend; // end of private data for comparison op
1633 void *reduce_init; // data initialization routine
1634 void *reduce_fini; // data finalization routine
1635 void *reduce_comb; // data combiner routine
1636 kmp_task_red_flags_t flags; // flags for additional info from compiler
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001637} kmp_task_red_data_t;
1638
1639// structure sent us by compiler - one per reduction item
1640typedef struct kmp_task_red_input {
Jonathan Peyton30419822017-05-12 18:01:32 +00001641 void *reduce_shar; // shared reduction item
1642 size_t reduce_size; // size of data item
1643 void *reduce_init; // data initialization routine
1644 void *reduce_fini; // data finalization routine
1645 void *reduce_comb; // data combiner routine
1646 kmp_task_red_flags_t flags; // flags for additional info from compiler
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001647} kmp_task_red_input_t;
1648
1649/*!
1650@ingroup TASKING
1651@param gtid Global thread ID
1652@param num Number of data items to reduce
1653@param data Array of data for reduction
1654@return The taskgroup identifier
1655
1656Initialize task reduction for the taskgroup.
1657*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001658void *__kmpc_task_reduction_init(int gtid, int num, void *data) {
1659 kmp_info_t *thread = __kmp_threads[gtid];
1660 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
1661 kmp_int32 nth = thread->th.th_team_nproc;
1662 kmp_task_red_input_t *input = (kmp_task_red_input_t *)data;
1663 kmp_task_red_data_t *arr;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001664
Jonathan Peyton30419822017-05-12 18:01:32 +00001665 // check input data just in case
1666 KMP_ASSERT(tg != NULL);
1667 KMP_ASSERT(data != NULL);
1668 KMP_ASSERT(num > 0);
1669 if (nth == 1) {
1670 KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
1671 gtid, tg));
1672 return (void *)tg;
1673 }
1674 KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
1675 gtid, tg, num));
1676 arr = (kmp_task_red_data_t *)__kmp_thread_malloc(
1677 thread, num * sizeof(kmp_task_red_data_t));
1678 for (int i = 0; i < num; ++i) {
1679 void (*f_init)(void *) = (void (*)(void *))(input[i].reduce_init);
1680 size_t size = input[i].reduce_size - 1;
1681 // round the size up to cache line per thread-specific item
1682 size += CACHE_LINE - size % CACHE_LINE;
1683 KMP_ASSERT(input[i].reduce_comb != NULL); // combiner is mandatory
1684 arr[i].reduce_shar = input[i].reduce_shar;
1685 arr[i].reduce_size = size;
1686 arr[i].reduce_init = input[i].reduce_init;
1687 arr[i].reduce_fini = input[i].reduce_fini;
1688 arr[i].reduce_comb = input[i].reduce_comb;
1689 arr[i].flags = input[i].flags;
1690 if (!input[i].flags.lazy_priv) {
1691 // allocate cache-line aligned block and fill it with zeros
1692 arr[i].reduce_priv = __kmp_allocate(nth * size);
1693 arr[i].reduce_pend = (char *)(arr[i].reduce_priv) + nth * size;
1694 if (f_init != NULL) {
1695 // initialize thread-specific items
1696 for (int j = 0; j < nth; ++j) {
1697 f_init((char *)(arr[i].reduce_priv) + j * size);
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001698 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001699 }
1700 } else {
1701 // only allocate space for pointers now,
1702 // objects will be lazily allocated/initialized once requested
1703 arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void *));
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001704 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001705 }
1706 tg->reduce_data = (void *)arr;
1707 tg->reduce_num_data = num;
1708 return (void *)tg;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001709}
1710
1711/*!
1712@ingroup TASKING
1713@param gtid Global thread ID
1714@param tskgrp The taskgroup ID (optional)
1715@param data Shared location of the item
1716@return The pointer to per-thread data
1717
1718Get thread-specific location of data item
1719*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001720void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
1721 kmp_info_t *thread = __kmp_threads[gtid];
1722 kmp_int32 nth = thread->th.th_team_nproc;
1723 if (nth == 1)
1724 return data; // nothing to do
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001725
Jonathan Peyton30419822017-05-12 18:01:32 +00001726 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
1727 if (tg == NULL)
1728 tg = thread->th.th_current_task->td_taskgroup;
1729 KMP_ASSERT(tg != NULL);
1730 kmp_task_red_data_t *arr = (kmp_task_red_data_t *)(tg->reduce_data);
1731 kmp_int32 num = tg->reduce_num_data;
1732 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001733
Jonathan Peyton30419822017-05-12 18:01:32 +00001734 KMP_ASSERT(data != NULL);
1735 while (tg != NULL) {
1736 for (int i = 0; i < num; ++i) {
1737 if (!arr[i].flags.lazy_priv) {
1738 if (data == arr[i].reduce_shar ||
1739 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
1740 return (char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
1741 } else {
1742 // check shared location first
1743 void **p_priv = (void **)(arr[i].reduce_priv);
1744 if (data == arr[i].reduce_shar)
1745 goto found;
1746 // check if we get some thread specific location as parameter
1747 for (int j = 0; j < nth; ++j)
1748 if (data == p_priv[j])
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001749 goto found;
Jonathan Peyton30419822017-05-12 18:01:32 +00001750 continue; // not found, continue search
1751 found:
1752 if (p_priv[tid] == NULL) {
1753 // allocate thread specific object lazily
1754 void (*f_init)(void *) = (void (*)(void *))(arr[i].reduce_init);
1755 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
1756 if (f_init != NULL) {
1757 f_init(p_priv[tid]);
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001758 }
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001759 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001760 return p_priv[tid];
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001761 }
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001762 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001763 tg = tg->parent;
1764 arr = (kmp_task_red_data_t *)(tg->reduce_data);
1765 num = tg->reduce_num_data;
1766 }
1767 KMP_ASSERT2(0, "Unknown task reduction item");
1768 return NULL; // ERROR, this line never executed
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001769}
1770
1771// Finalize task reduction.
1772// Called from __kmpc_end_taskgroup()
Jonathan Peyton30419822017-05-12 18:01:32 +00001773static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
1774 kmp_int32 nth = th->th.th_team_nproc;
1775 KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1
1776 kmp_task_red_data_t *arr = (kmp_task_red_data_t *)tg->reduce_data;
1777 kmp_int32 num = tg->reduce_num_data;
1778 for (int i = 0; i < num; ++i) {
1779 void *sh_data = arr[i].reduce_shar;
1780 void (*f_fini)(void *) = (void (*)(void *))(arr[i].reduce_fini);
1781 void (*f_comb)(void *, void *) =
1782 (void (*)(void *, void *))(arr[i].reduce_comb);
1783 if (!arr[i].flags.lazy_priv) {
1784 void *pr_data = arr[i].reduce_priv;
1785 size_t size = arr[i].reduce_size;
1786 for (int j = 0; j < nth; ++j) {
1787 void *priv_data = (char *)pr_data + j * size;
1788 f_comb(sh_data, priv_data); // combine results
1789 if (f_fini)
1790 f_fini(priv_data); // finalize if needed
1791 }
1792 } else {
1793 void **pr_data = (void **)(arr[i].reduce_priv);
1794 for (int j = 0; j < nth; ++j) {
1795 if (pr_data[j] != NULL) {
1796 f_comb(sh_data, pr_data[j]); // combine results
1797 if (f_fini)
1798 f_fini(pr_data[j]); // finalize if needed
1799 __kmp_free(pr_data[j]);
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001800 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001801 }
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001802 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001803 __kmp_free(arr[i].reduce_priv);
1804 }
1805 __kmp_thread_free(th, arr);
1806 tg->reduce_data = NULL;
1807 tg->reduce_num_data = 0;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001808}
1809#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001810
1811#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001812// __kmpc_taskgroup: Start a new taskgroup
Jonathan Peyton30419822017-05-12 18:01:32 +00001813void __kmpc_taskgroup(ident_t *loc, int gtid) {
1814 kmp_info_t *thread = __kmp_threads[gtid];
1815 kmp_taskdata_t *taskdata = thread->th.th_current_task;
1816 kmp_taskgroup_t *tg_new =
1817 (kmp_taskgroup_t *)__kmp_thread_malloc(thread, sizeof(kmp_taskgroup_t));
1818 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
1819 tg_new->count = 0;
1820 tg_new->cancel_request = cancel_noreq;
1821 tg_new->parent = taskdata->td_taskgroup;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001822// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
1823#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001824 tg_new->reduce_data = NULL;
1825 tg_new->reduce_num_data = 0;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001826#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001827 taskdata->td_taskgroup = tg_new;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001828}
1829
Jim Cownie5e8470a2013-09-27 10:38:44 +00001830// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1831// and its descendants are complete
Jonathan Peyton30419822017-05-12 18:01:32 +00001832void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
1833 kmp_info_t *thread = __kmp_threads[gtid];
1834 kmp_taskdata_t *taskdata = thread->th.th_current_task;
1835 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1836 int thread_finished = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001837
Jonathan Peyton30419822017-05-12 18:01:32 +00001838 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
1839 KMP_DEBUG_ASSERT(taskgroup != NULL);
1840 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001841
Jonathan Peyton30419822017-05-12 18:01:32 +00001842 if (__kmp_tasking_mode != tskm_immediate_exec) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001843#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001844 // For ITT the taskgroup wait is similar to taskwait until we need to
1845 // distinguish them
1846 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1847 if (itt_sync_obj != NULL)
1848 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001849#endif /* USE_ITT_BUILD */
1850
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001851#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001852 if (!taskdata->td_flags.team_serial ||
1853 (thread->th.th_task_team != NULL &&
1854 thread->th.th_task_team->tt.tt_found_proxy_tasks))
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001855#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001856 if (!taskdata->td_flags.team_serial)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001857#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001858 {
1859 kmp_flag_32 flag(&(taskgroup->count), 0U);
1860 while (TCR_4(taskgroup->count) != 0) {
1861 flag.execute_tasks(thread, gtid, FALSE,
1862 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1863 __kmp_task_stealing_constraint);
1864 }
1865 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001866
1867#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001868 if (itt_sync_obj != NULL)
1869 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001870#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001871 }
1872 KMP_DEBUG_ASSERT(taskgroup->count == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001873
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001874// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
1875#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001876 if (taskgroup->reduce_data != NULL) // need to reduce?
1877 __kmp_task_reduction_fini(thread, taskgroup);
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001878#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001879 // Restore parent taskgroup for the current task
1880 taskdata->td_taskgroup = taskgroup->parent;
1881 __kmp_thread_free(thread, taskgroup);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001882
Jonathan Peyton30419822017-05-12 18:01:32 +00001883 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
1884 gtid, taskdata));
1885 ANNOTATE_HAPPENS_AFTER(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001886}
1887#endif
1888
Jim Cownie5e8470a2013-09-27 10:38:44 +00001889// __kmp_remove_my_task: remove a task from my own deque
Jonathan Peyton30419822017-05-12 18:01:32 +00001890static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
1891 kmp_task_team_t *task_team,
1892 kmp_int32 is_constrained) {
1893 kmp_task_t *task;
1894 kmp_taskdata_t *taskdata;
1895 kmp_thread_data_t *thread_data;
1896 kmp_uint32 tail;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001897
Jonathan Peyton30419822017-05-12 18:01:32 +00001898 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
1899 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
1900 NULL); // Caller should check this condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001901
Jonathan Peyton30419822017-05-12 18:01:32 +00001902 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001903
Jonathan Peyton30419822017-05-12 18:01:32 +00001904 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1905 gtid, thread_data->td.td_deque_ntasks,
1906 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001907
Jonathan Peyton30419822017-05-12 18:01:32 +00001908 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
1909 KA_TRACE(10,
1910 ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
1911 "ntasks=%d head=%u tail=%u\n",
1912 gtid, thread_data->td.td_deque_ntasks,
1913 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
1914 return NULL;
1915 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001916
Jonathan Peyton30419822017-05-12 18:01:32 +00001917 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
1918
1919 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
1920 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
1921 KA_TRACE(10,
1922 ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
1923 "ntasks=%d head=%u tail=%u\n",
1924 gtid, thread_data->td.td_deque_ntasks,
1925 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
1926 return NULL;
1927 }
1928
1929 tail = (thread_data->td.td_deque_tail - 1) &
1930 TASK_DEQUE_MASK(thread_data->td); // Wrap index.
1931 taskdata = thread_data->td.td_deque[tail];
1932
1933 if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
1934 // we need to check if the candidate obeys task scheduling constraint:
1935 // only child of current task can be scheduled
1936 kmp_taskdata_t *current = thread->th.th_current_task;
1937 kmp_int32 level = current->td_level;
1938 kmp_taskdata_t *parent = taskdata->td_parent;
1939 while (parent != current && parent->td_level > level) {
1940 parent = parent->td_parent; // check generation up to the level of the
1941 // current task
1942 KMP_DEBUG_ASSERT(parent != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001943 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001944 if (parent != current) {
1945 // If the tail task is not a child, then no other child can appear in the
1946 // deque.
1947 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
1948 KA_TRACE(10,
1949 ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
1950 "ntasks=%d head=%u tail=%u\n",
1951 gtid, thread_data->td.td_deque_ntasks,
1952 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
1953 return NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001954 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001955 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001956
Jonathan Peyton30419822017-05-12 18:01:32 +00001957 thread_data->td.td_deque_tail = tail;
1958 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001959
Jonathan Peyton30419822017-05-12 18:01:32 +00001960 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001961
Jonathan Peyton30419822017-05-12 18:01:32 +00001962 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: "
1963 "ntasks=%d head=%u tail=%u\n",
1964 gtid, taskdata, thread_data->td.td_deque_ntasks,
1965 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001966
Jonathan Peyton30419822017-05-12 18:01:32 +00001967 task = KMP_TASKDATA_TO_TASK(taskdata);
1968 return task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969}
1970
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971// __kmp_steal_task: remove a task from another thread's deque
1972// Assume that calling thread has already checked existence of
1973// task_team thread_data before calling this routine.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001974static kmp_task_t *
Jonathan Peyton30419822017-05-12 18:01:32 +00001975__kmp_steal_task(kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1976 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1977 kmp_int32 is_constrained)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001978{
Jonathan Peyton30419822017-05-12 18:01:32 +00001979 kmp_task_t *task;
1980 kmp_taskdata_t *taskdata;
1981 kmp_thread_data_t *victim_td, *threads_data;
1982 kmp_int32 victim_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001983
Jonathan Peyton30419822017-05-12 18:01:32 +00001984 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001985
Jonathan Peyton30419822017-05-12 18:01:32 +00001986 threads_data = task_team->tt.tt_threads_data;
1987 KMP_DEBUG_ASSERT(threads_data != NULL); // Caller should check this condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988
Jonathan Peyton30419822017-05-12 18:01:32 +00001989 victim_tid = victim->th.th_info.ds.ds_tid;
1990 victim_td = &threads_data[victim_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001991
Jonathan Peyton30419822017-05-12 18:01:32 +00001992 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
1993 "task_team=%p ntasks=%d "
1994 "head=%u tail=%u\n",
1995 gtid, __kmp_gtid_from_thread(victim), task_team,
1996 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1997 victim_td->td.td_deque_tail));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001998
Jonathan Peyton30419822017-05-12 18:01:32 +00001999 if ((TCR_4(victim_td->td.td_deque_ntasks) ==
2000 0) || // Caller should not check this condition
2001 (TCR_PTR(victim->th.th_task_team) !=
2002 task_team)) // GEH: why would this happen?
2003 {
2004 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
2005 "task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00002006 "ntasks=%d head=%u tail=%u\n",
Jonathan Peyton30419822017-05-12 18:01:32 +00002007 gtid, __kmp_gtid_from_thread(victim), task_team,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002008 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
Jonathan Peyton30419822017-05-12 18:01:32 +00002009 victim_td->td.td_deque_tail));
2010 return NULL;
2011 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002012
Jonathan Peyton30419822017-05-12 18:01:32 +00002013 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
2014
2015 // Check again after we acquire the lock
2016 if ((TCR_4(victim_td->td.td_deque_ntasks) == 0) ||
2017 (TCR_PTR(victim->th.th_task_team) !=
2018 task_team)) // GEH: why would this happen?
2019 {
2020 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2021 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
2022 "task_team=%p "
2023 "ntasks=%d head=%u tail=%u\n",
2024 gtid, __kmp_gtid_from_thread(victim), task_team,
2025 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2026 victim_td->td.td_deque_tail));
2027 return NULL;
2028 }
2029
2030 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
2031
2032 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
2033 if (is_constrained) {
2034 // we need to check if the candidate obeys task scheduling constraint:
2035 // only descendant of current task can be scheduled
2036 kmp_taskdata_t *current = __kmp_threads[gtid]->th.th_current_task;
2037 kmp_int32 level = current->td_level;
2038 kmp_taskdata_t *parent = taskdata->td_parent;
2039 while (parent != current && parent->td_level > level) {
2040 parent = parent->td_parent; // check generation up to the level of the
2041 // current task
2042 KMP_DEBUG_ASSERT(parent != NULL);
2043 }
2044 if (parent != current) {
2045 // If the head task is not a descendant of the current task then do not
2046 // steal it. No other task in victim's deque can be a descendant of the
2047 // current task.
2048 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2049 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from "
2050 "T#%d: task_team=%p "
2051 "ntasks=%d head=%u tail=%u\n",
2052 gtid,
2053 __kmp_gtid_from_thread(threads_data[victim_tid].td.td_thr),
2054 task_team, victim_td->td.td_deque_ntasks,
2055 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2056 return NULL;
2057 }
2058 }
2059 // Bump head pointer and Wrap.
2060 victim_td->td.td_deque_head =
2061 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
2062 if (*thread_finished) {
2063 // We need to un-mark this victim as a finished victim. This must be done
2064 // before releasing the lock, or else other threads (starting with the
2065 // master victim) might be prematurely released from the barrier!!!
2066 kmp_uint32 count;
2067
2068 count = KMP_TEST_THEN_INC32((kmp_int32 *)unfinished_threads);
2069
2070 KA_TRACE(
2071 20,
2072 ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
2073 gtid, count + 1, task_team));
2074
2075 *thread_finished = FALSE;
2076 }
2077 TCW_4(victim_td->td.td_deque_ntasks,
2078 TCR_4(victim_td->td.td_deque_ntasks) - 1);
2079
2080
2081 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2082
2083 KMP_COUNT_BLOCK(TASK_stolen);
2084 KA_TRACE(
2085 10,
2086 ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
2087 "ntasks=%d head=%u tail=%u\n",
2088 gtid, taskdata, __kmp_gtid_from_thread(victim), task_team,
2089 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2090 victim_td->td.td_deque_tail));
2091
2092 task = KMP_TASKDATA_TO_TASK(taskdata);
2093 return task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002094}
2095
2096
Jonathan Peyton30419822017-05-12 18:01:32 +00002097// __kmp_execute_tasks_template: Choose and execute tasks until either the
2098// condition is statisfied (return true) or there are none left (return false).
2099//
Jim Cownie5e8470a2013-09-27 10:38:44 +00002100// final_spin is TRUE if this is the spin at the release barrier.
2101// thread_finished indicates whether the thread is finished executing all
2102// the tasks it has on its deque, and is at the release barrier.
2103// spinner is the location on which to spin.
2104// spinner == NULL means only execute a single task and return.
2105// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002106template <class C>
Jonathan Peyton30419822017-05-12 18:01:32 +00002107static inline int __kmp_execute_tasks_template(
2108 kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
2109 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
2110 kmp_int32 is_constrained) {
2111 kmp_task_team_t *task_team = thread->th.th_task_team;
2112 kmp_thread_data_t *threads_data;
2113 kmp_task_t *task;
2114 kmp_info_t *other_thread;
2115 kmp_taskdata_t *current_task = thread->th.th_current_task;
2116 volatile kmp_uint32 *unfinished_threads;
2117 kmp_int32 nthreads, victim = -2, use_own_tasks = 1, new_victim = 0,
2118 tid = thread->th.th_info.ds.ds_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002119
Jonathan Peyton30419822017-05-12 18:01:32 +00002120 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2121 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002122
Jonathan Peyton30419822017-05-12 18:01:32 +00002123 if (task_team == NULL)
2124 return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002125
Jonathan Peyton30419822017-05-12 18:01:32 +00002126 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
2127 "*thread_finished=%d\n",
2128 gtid, final_spin, *thread_finished));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002129
Jonathan Peyton30419822017-05-12 18:01:32 +00002130 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
2131 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2132 KMP_DEBUG_ASSERT(threads_data != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002133
Jonathan Peyton30419822017-05-12 18:01:32 +00002134 nthreads = task_team->tt.tt_nproc;
2135 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002136#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002137 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002138#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002139 KMP_DEBUG_ASSERT(nthreads > 1);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002140#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002141 KMP_DEBUG_ASSERT((int)(TCR_4(*unfinished_threads)) >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002142
Jonathan Peyton30419822017-05-12 18:01:32 +00002143 while (1) { // Outer loop keeps trying to find tasks in case of single thread
2144 // getting tasks from target constructs
2145 while (1) { // Inner loop to find a task and execute it
2146 task = NULL;
2147 if (use_own_tasks) { // check on own queue first
2148 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
2149 }
2150 if ((task == NULL) && (nthreads > 1)) { // Steal a task
2151 int asleep = 1;
2152 use_own_tasks = 0;
2153 // Try to steal from the last place I stole from successfully.
2154 if (victim == -2) { // haven't stolen anything yet
2155 victim = threads_data[tid].td.td_deque_last_stolen;
2156 if (victim !=
2157 -1) // if we have a last stolen from victim, get the thread
2158 other_thread = threads_data[victim].td.td_thr;
2159 }
2160 if (victim != -1) { // found last victim
2161 asleep = 0;
2162 } else if (!new_victim) { // no recent steals and we haven't already
2163 // used a new victim; select a random thread
2164 do { // Find a different thread to steal work from.
2165 // Pick a random thread. Initial plan was to cycle through all the
2166 // threads, and only return if we tried to steal from every thread,
2167 // and failed. Arch says that's not such a great idea.
2168 victim = __kmp_get_random(thread) % (nthreads - 1);
2169 if (victim >= tid) {
2170 ++victim; // Adjusts random distribution to exclude self
Jim Cownie5e8470a2013-09-27 10:38:44 +00002171 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002172 // Found a potential victim
2173 other_thread = threads_data[victim].td.td_thr;
2174 // There is a slight chance that __kmp_enable_tasking() did not wake
2175 // up all threads waiting at the barrier. If victim is sleeping,
2176 // then wake it up. Since we were going to pay the cache miss
2177 // penalty for referencing another thread's kmp_info_t struct
2178 // anyway,
2179 // the check shouldn't cost too much performance at this point. In
2180 // extra barrier mode, tasks do not sleep at the separate tasking
2181 // barrier, so this isn't a problem.
2182 asleep = 0;
2183 if ((__kmp_tasking_mode == tskm_task_teams) &&
2184 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
2185 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) {
2186 asleep = 1;
2187 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
2188 other_thread->th.th_sleep_loc);
2189 // A sleeping thread should not have any tasks on it's queue.
2190 // There is a slight possibility that it resumes, steals a task
2191 // from another thread, which spawns more tasks, all in the time
2192 // that it takes this thread to check => don't write an assertion
2193 // that the victim's queue is empty. Try stealing from a
2194 // different thread.
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002195 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002196 } while (asleep);
2197 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002198
Jonathan Peyton30419822017-05-12 18:01:32 +00002199 if (!asleep) {
2200 // We have a victim to try to steal from
2201 task = __kmp_steal_task(other_thread, gtid, task_team,
2202 unfinished_threads, thread_finished,
2203 is_constrained);
2204 }
2205 if (task != NULL) { // set last stolen to victim
2206 if (threads_data[tid].td.td_deque_last_stolen != victim) {
2207 threads_data[tid].td.td_deque_last_stolen = victim;
2208 // The pre-refactored code did not try more than 1 successful new
2209 // vicitm, unless the last one generated more local tasks;
2210 // new_victim keeps track of this
2211 new_victim = 1;
2212 }
2213 } else { // No tasks found; unset last_stolen
2214 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
2215 victim = -2; // no successful victim found
2216 }
2217 }
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002218
Jonathan Peyton30419822017-05-12 18:01:32 +00002219 if (task == NULL) // break out of tasking loop
2220 break;
2221
2222// Found a task; execute it
Jim Cownie5e8470a2013-09-27 10:38:44 +00002223#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002224 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2225 if (itt_sync_obj == NULL) { // we are at fork barrier where we could not
2226 // get the object reliably
2227 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2228 }
2229 __kmp_itt_task_starting(itt_sync_obj);
2230 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002231#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
Jonathan Peyton30419822017-05-12 18:01:32 +00002232 __kmp_invoke_task(gtid, task, current_task);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002233#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002234 if (itt_sync_obj != NULL)
2235 __kmp_itt_task_finished(itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002236#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002237 // If this thread is only partway through the barrier and the condition is
2238 // met, then return now, so that the barrier gather/release pattern can
2239 // proceed. If this thread is in the last spin loop in the barrier,
2240 // waiting to be released, we know that the termination condition will not
2241 // be satisified, so don't waste any cycles checking it.
2242 if (flag == NULL || (!final_spin && flag->done_check())) {
2243 KA_TRACE(
2244 15,
2245 ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
2246 gtid));
2247 return TRUE;
2248 }
2249 if (thread->th.th_task_team == NULL) {
2250 break;
2251 }
2252 // Yield before executing next task
2253 KMP_YIELD(__kmp_library == library_throughput);
2254 // If execution of a stolen task results in more tasks being placed on our
2255 // run queue, reset use_own_tasks
2256 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
2257 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned "
2258 "other tasks, restart\n",
2259 gtid));
2260 use_own_tasks = 1;
2261 new_victim = 0;
2262 }
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002263 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002264
2265// The task source has been exhausted. If in final spin loop of barrier, check
2266// if termination condition is satisfied.
2267#if OMP_45_ENABLED
2268 // The work queue may be empty but there might be proxy tasks still
2269 // executing
2270 if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
2271#else
2272 if (final_spin)
2273#endif
2274 {
2275 // First, decrement the #unfinished threads, if that has not already been
2276 // done. This decrement might be to the spin location, and result in the
2277 // termination condition being satisfied.
2278 if (!*thread_finished) {
2279 kmp_uint32 count;
2280
2281 count = KMP_TEST_THEN_DEC32((kmp_int32 *)unfinished_threads) - 1;
2282 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec "
2283 "unfinished_threads to %d task_team=%p\n",
2284 gtid, count, task_team));
2285 *thread_finished = TRUE;
2286 }
2287
2288 // It is now unsafe to reference thread->th.th_team !!!
2289 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
2290 // thread to pass through the barrier, where it might reset each thread's
2291 // th.th_team field for the next parallel region. If we can steal more
2292 // work, we know that this has not happened yet.
2293 if (flag != NULL && flag->done_check()) {
2294 KA_TRACE(
2295 15,
2296 ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
2297 gtid));
2298 return TRUE;
2299 }
2300 }
2301
2302 // If this thread's task team is NULL, master has recognized that there are
2303 // no more tasks; bail out
2304 if (thread->th.th_task_team == NULL) {
2305 KA_TRACE(15,
2306 ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
2307 return FALSE;
2308 }
2309
2310#if OMP_45_ENABLED
2311 // We could be getting tasks from target constructs; if this is the only
2312 // thread, keep trying to execute tasks from own queue
2313 if (nthreads == 1)
2314 use_own_tasks = 1;
2315 else
2316#endif
2317 {
2318 KA_TRACE(15,
2319 ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
2320 return FALSE;
2321 }
2322 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002323}
2324
Jonathan Peyton30419822017-05-12 18:01:32 +00002325int __kmp_execute_tasks_32(
2326 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
2327 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
2328 kmp_int32 is_constrained) {
2329 return __kmp_execute_tasks_template(
2330 thread, gtid, flag, final_spin,
2331 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002332}
2333
Jonathan Peyton30419822017-05-12 18:01:32 +00002334int __kmp_execute_tasks_64(
2335 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
2336 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
2337 kmp_int32 is_constrained) {
2338 return __kmp_execute_tasks_template(
2339 thread, gtid, flag, final_spin,
2340 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002341}
2342
Jonathan Peyton30419822017-05-12 18:01:32 +00002343int __kmp_execute_tasks_oncore(
2344 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
2345 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
2346 kmp_int32 is_constrained) {
2347 return __kmp_execute_tasks_template(
2348 thread, gtid, flag, final_spin,
2349 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002350}
2351
Jim Cownie5e8470a2013-09-27 10:38:44 +00002352// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2353// next barrier so they can assist in executing enqueued tasks.
2354// First thread in allocates the task team atomically.
Jonathan Peyton30419822017-05-12 18:01:32 +00002355static void __kmp_enable_tasking(kmp_task_team_t *task_team,
2356 kmp_info_t *this_thr) {
2357 kmp_thread_data_t *threads_data;
2358 int nthreads, i, is_init_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002359
Jonathan Peyton30419822017-05-12 18:01:32 +00002360 KA_TRACE(10, ("__kmp_enable_tasking(enter): T#%d\n",
2361 __kmp_gtid_from_thread(this_thr)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002362
Jonathan Peyton30419822017-05-12 18:01:32 +00002363 KMP_DEBUG_ASSERT(task_team != NULL);
2364 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002365
Jonathan Peyton30419822017-05-12 18:01:32 +00002366 nthreads = task_team->tt.tt_nproc;
2367 KMP_DEBUG_ASSERT(nthreads > 0);
2368 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002369
Jonathan Peyton30419822017-05-12 18:01:32 +00002370 // Allocate or increase the size of threads_data if necessary
2371 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002372
Jonathan Peyton30419822017-05-12 18:01:32 +00002373 if (!is_init_thread) {
2374 // Some other thread already set up the array.
2375 KA_TRACE(
2376 20,
2377 ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2378 __kmp_gtid_from_thread(this_thr)));
2379 return;
2380 }
2381 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2382 KMP_DEBUG_ASSERT(threads_data != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002383
Jonathan Peyton30419822017-05-12 18:01:32 +00002384 if ((__kmp_tasking_mode == tskm_task_teams) &&
2385 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
2386 // Release any threads sleeping at the barrier, so that they can steal
2387 // tasks and execute them. In extra barrier mode, tasks do not sleep
2388 // at the separate tasking barrier, so this isn't a problem.
2389 for (i = 0; i < nthreads; i++) {
2390 volatile void *sleep_loc;
2391 kmp_info_t *thread = threads_data[i].td.td_thr;
2392
2393 if (i == this_thr->th.th_info.ds.ds_tid) {
2394 continue;
2395 }
2396 // Since we haven't locked the thread's suspend mutex lock at this
2397 // point, there is a small window where a thread might be putting
2398 // itself to sleep, but hasn't set the th_sleep_loc field yet.
2399 // To work around this, __kmp_execute_tasks_template() periodically checks
2400 // see if other threads are sleeping (using the same random mechanism that
2401 // is used for task stealing) and awakens them if they are.
2402 if ((sleep_loc = TCR_PTR(thread->th.th_sleep_loc)) != NULL) {
2403 KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2404 __kmp_gtid_from_thread(this_thr),
2405 __kmp_gtid_from_thread(thread)));
2406 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2407 } else {
2408 KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2409 __kmp_gtid_from_thread(this_thr),
2410 __kmp_gtid_from_thread(thread)));
2411 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002412 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002413 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002414
Jonathan Peyton30419822017-05-12 18:01:32 +00002415 KA_TRACE(10, ("__kmp_enable_tasking(exit): T#%d\n",
2416 __kmp_gtid_from_thread(this_thr)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002417}
2418
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002419/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00002420 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2421 * like a shadow of the kmp_team_t data struct, with a different lifetime.
2422 * After a child * thread checks into a barrier and calls __kmp_release() from
2423 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2424 * longer assume that the kmp_team_t structure is intact (at any moment, the
2425 * master thread may exit the barrier code and free the team data structure,
2426 * and return the threads to the thread pool).
2427 *
2428 * This does not work with the the tasking code, as the thread is still
2429 * expected to participate in the execution of any tasks that may have been
2430 * spawned my a member of the team, and the thread still needs access to all
2431 * to each thread in the team, so that it can steal work from it.
2432 *
2433 * Enter the existence of the kmp_task_team_t struct. It employs a reference
2434 * counting mechanims, and is allocated by the master thread before calling
2435 * __kmp_<barrier_kind>_release, and then is release by the last thread to
2436 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2437 * of the kmp_task_team_t structs for consecutive barriers can overlap
2438 * (and will, unless the master thread is the last thread to exit the barrier
2439 * release phase, which is not typical).
2440 *
2441 * The existence of such a struct is useful outside the context of tasking,
2442 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2443 * so that any performance differences show up when comparing the 2.5 vs. 3.0
2444 * libraries.
2445 *
2446 * We currently use the existence of the threads array as an indicator that
2447 * tasks were spawned since the last barrier. If the structure is to be
2448 * useful outside the context of tasking, then this will have to change, but
2449 * not settting the field minimizes the performance impact of tasking on
2450 * barriers, when no explicit tasks were spawned (pushed, actually).
2451 */
2452
Jonathan Peyton30419822017-05-12 18:01:32 +00002453static kmp_task_team_t *__kmp_free_task_teams =
2454 NULL; // Free list for task_team data structures
Jim Cownie5e8470a2013-09-27 10:38:44 +00002455// Lock for task team data structures
Jonathan Peyton30419822017-05-12 18:01:32 +00002456static kmp_bootstrap_lock_t __kmp_task_team_lock =
2457 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002458
Jim Cownie5e8470a2013-09-27 10:38:44 +00002459// __kmp_alloc_task_deque:
2460// Allocates a task deque for a particular thread, and initialize the necessary
2461// data structures relating to the deque. This only happens once per thread
Jonathan Peyton30419822017-05-12 18:01:32 +00002462// per task team since task teams are recycled. No lock is needed during
2463// allocation since each thread allocates its own deque.
2464static void __kmp_alloc_task_deque(kmp_info_t *thread,
2465 kmp_thread_data_t *thread_data) {
2466 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
2467 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002468
Jonathan Peyton30419822017-05-12 18:01:32 +00002469 // Initialize last stolen task field to "none"
2470 thread_data->td.td_deque_last_stolen = -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002471
Jonathan Peyton30419822017-05-12 18:01:32 +00002472 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
2473 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
2474 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002475
Jonathan Peyton30419822017-05-12 18:01:32 +00002476 KE_TRACE(
2477 10,
2478 ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2479 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
2480 // Allocate space for task deque, and zero the deque
2481 // Cannot use __kmp_thread_calloc() because threads not around for
2482 // kmp_reap_task_team( ).
2483 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
2484 INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2485 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002486}
2487
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002488// __kmp_realloc_task_deque:
Jonathan Peyton30419822017-05-12 18:01:32 +00002489// Re-allocates a task deque for a particular thread, copies the content from
2490// the old deque and adjusts the necessary data structures relating to the
2491// deque. This operation must be done with a the deque_lock being held
2492static void __kmp_realloc_task_deque(kmp_info_t *thread,
2493 kmp_thread_data_t *thread_data) {
2494 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
2495 kmp_int32 new_size = 2 * size;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002496
Jonathan Peyton30419822017-05-12 18:01:32 +00002497 KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
2498 "%d] for thread_data %p\n",
2499 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002500
Jonathan Peyton30419822017-05-12 18:01:32 +00002501 kmp_taskdata_t **new_deque =
2502 (kmp_taskdata_t **)__kmp_allocate(new_size * sizeof(kmp_taskdata_t *));
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002503
Jonathan Peyton30419822017-05-12 18:01:32 +00002504 int i, j;
2505 for (i = thread_data->td.td_deque_head, j = 0; j < size;
2506 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
2507 new_deque[j] = thread_data->td.td_deque[i];
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002508
Jonathan Peyton30419822017-05-12 18:01:32 +00002509 __kmp_free(thread_data->td.td_deque);
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002510
Jonathan Peyton30419822017-05-12 18:01:32 +00002511 thread_data->td.td_deque_head = 0;
2512 thread_data->td.td_deque_tail = size;
2513 thread_data->td.td_deque = new_deque;
2514 thread_data->td.td_deque_size = new_size;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002515}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002516
Jim Cownie5e8470a2013-09-27 10:38:44 +00002517// __kmp_free_task_deque:
Jonathan Peyton30419822017-05-12 18:01:32 +00002518// Deallocates a task deque for a particular thread. Happens at library
2519// deallocation so don't need to reset all thread data fields.
2520static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
2521 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002522
Jonathan Peyton30419822017-05-12 18:01:32 +00002523 if (thread_data->td.td_deque != NULL) {
2524 TCW_4(thread_data->td.td_deque_ntasks, 0);
2525 __kmp_free(thread_data->td.td_deque);
2526 thread_data->td.td_deque = NULL;
2527 }
2528 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002529
2530#ifdef BUILD_TIED_TASK_STACK
Jonathan Peyton30419822017-05-12 18:01:32 +00002531 // GEH: Figure out what to do here for td_susp_tied_tasks
2532 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
2533 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
2534 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002535#endif // BUILD_TIED_TASK_STACK
2536}
2537
Jim Cownie5e8470a2013-09-27 10:38:44 +00002538// __kmp_realloc_task_threads_data:
Jonathan Peyton30419822017-05-12 18:01:32 +00002539// Allocates a threads_data array for a task team, either by allocating an
2540// initial array or enlarging an existing array. Only the first thread to get
2541// the lock allocs or enlarges the array and re-initializes the array eleemnts.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002542// That thread returns "TRUE", the rest return "FALSE".
2543// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2544// The current size is given by task_team -> tt.tt_max_threads.
Jonathan Peyton30419822017-05-12 18:01:32 +00002545static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
2546 kmp_task_team_t *task_team) {
2547 kmp_thread_data_t **threads_data_p;
2548 kmp_int32 nthreads, maxthreads;
2549 int is_init_thread = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002550
Jonathan Peyton30419822017-05-12 18:01:32 +00002551 if (TCR_4(task_team->tt.tt_found_tasks)) {
2552 // Already reallocated and initialized.
2553 return FALSE;
2554 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002555
Jonathan Peyton30419822017-05-12 18:01:32 +00002556 threads_data_p = &task_team->tt.tt_threads_data;
2557 nthreads = task_team->tt.tt_nproc;
2558 maxthreads = task_team->tt.tt_max_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002559
Jonathan Peyton30419822017-05-12 18:01:32 +00002560 // All threads must lock when they encounter the first task of the implicit
2561 // task region to make sure threads_data fields are (re)initialized before
2562 // used.
2563 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002564
Jonathan Peyton30419822017-05-12 18:01:32 +00002565 if (!TCR_4(task_team->tt.tt_found_tasks)) {
2566 // first thread to enable tasking
2567 kmp_team_t *team = thread->th.th_team;
2568 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002569
Jonathan Peyton30419822017-05-12 18:01:32 +00002570 is_init_thread = TRUE;
2571 if (maxthreads < nthreads) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002572
Jonathan Peyton30419822017-05-12 18:01:32 +00002573 if (*threads_data_p != NULL) {
2574 kmp_thread_data_t *old_data = *threads_data_p;
2575 kmp_thread_data_t *new_data = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002576
Jonathan Peyton30419822017-05-12 18:01:32 +00002577 KE_TRACE(
2578 10,
2579 ("__kmp_realloc_task_threads_data: T#%d reallocating "
2580 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2581 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
2582 // Reallocate threads_data to have more elements than current array
2583 // Cannot use __kmp_thread_realloc() because threads not around for
2584 // kmp_reap_task_team( ). Note all new array entries are initialized
2585 // to zero by __kmp_allocate().
2586 new_data = (kmp_thread_data_t *)__kmp_allocate(
2587 nthreads * sizeof(kmp_thread_data_t));
2588 // copy old data to new data
2589 KMP_MEMCPY_S((void *)new_data, nthreads * sizeof(kmp_thread_data_t),
2590 (void *)old_data, maxthreads * sizeof(kmp_taskdata_t *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002591
2592#ifdef BUILD_TIED_TASK_STACK
Jonathan Peyton30419822017-05-12 18:01:32 +00002593 // GEH: Figure out if this is the right thing to do
2594 for (i = maxthreads; i < nthreads; i++) {
2595 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2596 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
2597 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002598#endif // BUILD_TIED_TASK_STACK
Jonathan Peyton30419822017-05-12 18:01:32 +00002599 // Install the new data and free the old data
2600 (*threads_data_p) = new_data;
2601 __kmp_free(old_data);
2602 } else {
2603 KE_TRACE(10, ("__kmp_realloc_task_threads_data: T#%d allocating "
2604 "threads data for task_team %p, size = %d\n",
2605 __kmp_gtid_from_thread(thread), task_team, nthreads));
2606 // Make the initial allocate for threads_data array, and zero entries
2607 // Cannot use __kmp_thread_calloc() because threads not around for
2608 // kmp_reap_task_team( ).
2609 ANNOTATE_IGNORE_WRITES_BEGIN();
2610 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
2611 nthreads * sizeof(kmp_thread_data_t));
2612 ANNOTATE_IGNORE_WRITES_END();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002613#ifdef BUILD_TIED_TASK_STACK
Jonathan Peyton30419822017-05-12 18:01:32 +00002614 // GEH: Figure out if this is the right thing to do
Jim Cownie5e8470a2013-09-27 10:38:44 +00002615 for (i = 0; i < nthreads; i++) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002616 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2617 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002618 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002619#endif // BUILD_TIED_TASK_STACK
2620 }
2621 task_team->tt.tt_max_threads = nthreads;
2622 } else {
2623 // If array has (more than) enough elements, go ahead and use it
2624 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002625 }
2626
Jonathan Peyton30419822017-05-12 18:01:32 +00002627 // initialize threads_data pointers back to thread_info structures
2628 for (i = 0; i < nthreads; i++) {
2629 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2630 thread_data->td.td_thr = team->t.t_threads[i];
2631
2632 if (thread_data->td.td_deque_last_stolen >= nthreads) {
2633 // The last stolen field survives across teams / barrier, and the number
2634 // of threads may have changed. It's possible (likely?) that a new
2635 // parallel region will exhibit the same behavior as previous region.
2636 thread_data->td.td_deque_last_stolen = -1;
2637 }
2638 }
2639
2640 KMP_MB();
2641 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
2642 }
2643
2644 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
2645 return is_init_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002646}
2647
Jim Cownie5e8470a2013-09-27 10:38:44 +00002648// __kmp_free_task_threads_data:
2649// Deallocates a threads_data array for a task team, including any attached
2650// tasking deques. Only occurs at library shutdown.
Jonathan Peyton30419822017-05-12 18:01:32 +00002651static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
2652 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
2653 if (task_team->tt.tt_threads_data != NULL) {
2654 int i;
2655 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
2656 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002657 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002658 __kmp_free(task_team->tt.tt_threads_data);
2659 task_team->tt.tt_threads_data = NULL;
2660 }
2661 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002662}
2663
Jim Cownie5e8470a2013-09-27 10:38:44 +00002664// __kmp_allocate_task_team:
2665// Allocates a task team associated with a specific team, taking it from
Jonathan Peyton30419822017-05-12 18:01:32 +00002666// the global task team free list if possible. Also initializes data
2667// structures.
2668static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
2669 kmp_team_t *team) {
2670 kmp_task_team_t *task_team = NULL;
2671 int nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002672
Jonathan Peyton30419822017-05-12 18:01:32 +00002673 KA_TRACE(20, ("__kmp_allocate_task_team: T#%d entering; team = %p\n",
2674 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002675
Jonathan Peyton30419822017-05-12 18:01:32 +00002676 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2677 // Take a task team from the task team pool
2678 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
2679 if (__kmp_free_task_teams != NULL) {
2680 task_team = __kmp_free_task_teams;
2681 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
2682 task_team->tt.tt_next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002683 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002684 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
2685 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002686
Jonathan Peyton30419822017-05-12 18:01:32 +00002687 if (task_team == NULL) {
2688 KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating "
2689 "task team for team %p\n",
2690 __kmp_gtid_from_thread(thread), team));
2691 // Allocate a new task team if one is not available.
2692 // Cannot use __kmp_thread_malloc() because threads not around for
2693 // kmp_reap_task_team( ).
2694 task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t));
2695 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
2696 // AC: __kmp_allocate zeroes returned memory
2697 // task_team -> tt.tt_threads_data = NULL;
2698 // task_team -> tt.tt_max_threads = 0;
2699 // task_team -> tt.tt_next = NULL;
2700 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002701
Jonathan Peyton30419822017-05-12 18:01:32 +00002702 TCW_4(task_team->tt.tt_found_tasks, FALSE);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002703#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002704 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002705#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002706 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002707
Jonathan Peyton30419822017-05-12 18:01:32 +00002708 TCW_4(task_team->tt.tt_unfinished_threads, nthreads);
2709 TCW_4(task_team->tt.tt_active, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002710
Jonathan Peyton30419822017-05-12 18:01:32 +00002711 KA_TRACE(20, ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
2712 "unfinished_threads init'd to %d\n",
2713 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
2714 task_team->tt.tt_unfinished_threads));
2715 return task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002716}
2717
Jim Cownie5e8470a2013-09-27 10:38:44 +00002718// __kmp_free_task_team:
2719// Frees the task team associated with a specific thread, and adds it
2720// to the global task team free list.
Jonathan Peyton30419822017-05-12 18:01:32 +00002721void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
2722 KA_TRACE(20, ("__kmp_free_task_team: T#%d task_team = %p\n",
2723 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002724
Jonathan Peyton30419822017-05-12 18:01:32 +00002725 // Put task team back on free list
2726 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002727
Jonathan Peyton30419822017-05-12 18:01:32 +00002728 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
2729 task_team->tt.tt_next = __kmp_free_task_teams;
2730 TCW_PTR(__kmp_free_task_teams, task_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002731
Jonathan Peyton30419822017-05-12 18:01:32 +00002732 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002733}
2734
Jim Cownie5e8470a2013-09-27 10:38:44 +00002735// __kmp_reap_task_teams:
2736// Free all the task teams on the task team free list.
2737// Should only be done during library shutdown.
Jonathan Peyton30419822017-05-12 18:01:32 +00002738// Cannot do anything that needs a thread structure or gtid since they are
2739// already gone.
2740void __kmp_reap_task_teams(void) {
2741 kmp_task_team_t *task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002742
Jonathan Peyton30419822017-05-12 18:01:32 +00002743 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2744 // Free all task_teams on the free list
2745 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
2746 while ((task_team = __kmp_free_task_teams) != NULL) {
2747 __kmp_free_task_teams = task_team->tt.tt_next;
2748 task_team->tt.tt_next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002749
Jonathan Peyton30419822017-05-12 18:01:32 +00002750 // Free threads_data if necessary
2751 if (task_team->tt.tt_threads_data != NULL) {
2752 __kmp_free_task_threads_data(task_team);
2753 }
2754 __kmp_free(task_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002755 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002756 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
2757 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002758}
2759
Jim Cownie5e8470a2013-09-27 10:38:44 +00002760// __kmp_wait_to_unref_task_teams:
2761// Some threads could still be in the fork barrier release code, possibly
2762// trying to steal tasks. Wait for each thread to unreference its task team.
Jonathan Peyton30419822017-05-12 18:01:32 +00002763void __kmp_wait_to_unref_task_teams(void) {
2764 kmp_info_t *thread;
2765 kmp_uint32 spins;
2766 int done;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002767
Jonathan Peyton30419822017-05-12 18:01:32 +00002768 KMP_INIT_YIELD(spins);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002769
Jonathan Peyton30419822017-05-12 18:01:32 +00002770 for (;;) {
2771 done = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002772
Jonathan Peyton30419822017-05-12 18:01:32 +00002773 // TODO: GEH - this may be is wrong because some sync would be necessary
2774 // in case threads are added to the pool during the traversal. Need to
2775 // verify that lock for thread pool is held when calling this routine.
2776 for (thread = (kmp_info_t *)__kmp_thread_pool; thread != NULL;
2777 thread = thread->th.th_next_pool) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002778#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00002779 DWORD exit_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002780#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002781 if (TCR_PTR(thread->th.th_task_team) == NULL) {
2782 KA_TRACE(10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2783 __kmp_gtid_from_thread(thread)));
2784 continue;
2785 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002786#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00002787 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2788 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2789 thread->th.th_task_team = NULL;
2790 continue;
2791 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002792#endif
2793
Jonathan Peyton30419822017-05-12 18:01:32 +00002794 done = FALSE; // Because th_task_team pointer is not NULL for this thread
Jim Cownie5e8470a2013-09-27 10:38:44 +00002795
Jonathan Peyton30419822017-05-12 18:01:32 +00002796 KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
2797 "unreference task_team\n",
2798 __kmp_gtid_from_thread(thread)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002799
Jonathan Peyton30419822017-05-12 18:01:32 +00002800 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
2801 volatile void *sleep_loc;
2802 // If the thread is sleeping, awaken it.
2803 if ((sleep_loc = TCR_PTR(thread->th.th_sleep_loc)) != NULL) {
2804 KA_TRACE(
2805 10,
2806 ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2807 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
2808 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002809 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002810 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002811 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002812 if (done) {
2813 break;
2814 }
2815
2816 // If we are oversubscribed, or have waited a bit (and library mode is
2817 // throughput), yield. Pause is in the following code.
2818 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
2819 KMP_YIELD_SPIN(spins); // Yields only if KMP_LIBRARY=throughput
2820 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002821}
2822
Jim Cownie5e8470a2013-09-27 10:38:44 +00002823// __kmp_task_team_setup: Create a task_team for the current team, but use
2824// an already created, unused one if it already exists.
Jonathan Peyton30419822017-05-12 18:01:32 +00002825void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, int always) {
2826 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002827
Jonathan Peyton30419822017-05-12 18:01:32 +00002828 // If this task_team hasn't been created yet, allocate it. It will be used in
2829 // the region after the next.
2830 // If it exists, it is the current task team and shouldn't be touched yet as
2831 // it may still be in use.
2832 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
2833 (always || team->t.t_nproc > 1)) {
2834 team->t.t_task_team[this_thr->th.th_task_state] =
2835 __kmp_allocate_task_team(this_thr, team);
2836 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p "
2837 "for team %d at parity=%d\n",
2838 __kmp_gtid_from_thread(this_thr),
2839 team->t.t_task_team[this_thr->th.th_task_state],
2840 ((team != NULL) ? team->t.t_id : -1),
2841 this_thr->th.th_task_state));
2842 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002843
Jonathan Peyton30419822017-05-12 18:01:32 +00002844 // After threads exit the release, they will call sync, and then point to this
2845 // other task_team; make sure it is allocated and properly initialized. As
2846 // threads spin in the barrier release phase, they will continue to use the
2847 // previous task_team struct(above), until they receive the signal to stop
2848 // checking for tasks (they can't safely reference the kmp_team_t struct,
2849 // which could be reallocated by the master thread). No task teams are formed
2850 // for serialized teams.
2851 if (team->t.t_nproc > 1) {
2852 int other_team = 1 - this_thr->th.th_task_state;
2853 if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
2854 team->t.t_task_team[other_team] =
2855 __kmp_allocate_task_team(this_thr, team);
2856 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new "
2857 "task_team %p for team %d at parity=%d\n",
2858 __kmp_gtid_from_thread(this_thr),
2859 team->t.t_task_team[other_team],
2860 ((team != NULL) ? team->t.t_id : -1), other_team));
2861 } else { // Leave the old task team struct in place for the upcoming region;
2862 // adjust as needed
2863 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2864 if (!task_team->tt.tt_active ||
2865 team->t.t_nproc != task_team->tt.tt_nproc) {
2866 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2867 TCW_4(task_team->tt.tt_found_tasks, FALSE);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002868#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002869 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
Jonathan Peytone1dad192015-11-30 20:05:13 +00002870#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002871 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc);
2872 TCW_4(task_team->tt.tt_active, TRUE);
2873 }
2874 // if team size has changed, the first thread to enable tasking will
2875 // realloc threads_data if necessary
2876 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team "
2877 "%p for team %d at parity=%d\n",
2878 __kmp_gtid_from_thread(this_thr),
2879 team->t.t_task_team[other_team],
2880 ((team != NULL) ? team->t.t_id : -1), other_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002881 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002882 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002883}
2884
Jim Cownie5e8470a2013-09-27 10:38:44 +00002885// __kmp_task_team_sync: Propagation of task team data from team to threads
2886// which happens just after the release phase of a team barrier. This may be
2887// called by any thread, but only for teams with # threads > 1.
Jonathan Peyton30419822017-05-12 18:01:32 +00002888void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
2889 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002890
Jonathan Peyton30419822017-05-12 18:01:32 +00002891 // Toggle the th_task_state field, to switch which task_team this thread
2892 // refers to
2893 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2894 // It is now safe to propagate the task team pointer from the team struct to
2895 // the current thread.
2896 TCW_PTR(this_thr->th.th_task_team,
2897 team->t.t_task_team[this_thr->th.th_task_state]);
2898 KA_TRACE(20,
2899 ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
2900 "%p from Team #%d (parity=%d)\n",
2901 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
2902 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002903}
2904
Jonathan Peyton30419822017-05-12 18:01:32 +00002905// __kmp_task_team_wait: Master thread waits for outstanding tasks after the
2906// barrier gather phase. Only called by master thread if #threads in team > 1 or
2907// if proxy tasks were created.
2908//
2909// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off
2910// by passing in 0 optionally as the last argument. When wait is zero, master
2911// thread does not wait for unfinished_threads to reach 0.
2912void __kmp_task_team_wait(
2913 kmp_info_t *this_thr,
2914 kmp_team_t *team USE_ITT_BUILD_ARG(void *itt_sync_obj), int wait) {
2915 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002916
Jonathan Peyton30419822017-05-12 18:01:32 +00002917 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2918 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002919
Jonathan Peyton30419822017-05-12 18:01:32 +00002920 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
2921 if (wait) {
2922 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks "
2923 "(for unfinished_threads to reach 0) on task_team = %p\n",
2924 __kmp_gtid_from_thread(this_thr), task_team));
2925 // Worker threads may have dropped through to release phase, but could
2926 // still be executing tasks. Wait here for tasks to complete. To avoid
2927 // memory contention, only master thread checks termination condition.
2928 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2929 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002930 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002931 // Deactivate the old task team, so that the worker threads will stop
2932 // referencing it while spinning.
2933 KA_TRACE(
2934 20,
2935 ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
2936 "setting active to false, setting local and team's pointer to NULL\n",
2937 __kmp_gtid_from_thread(this_thr), task_team));
2938#if OMP_45_ENABLED
2939 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
2940 task_team->tt.tt_found_proxy_tasks == TRUE);
2941 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2942#else
2943 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1);
2944#endif
2945 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
2946 KMP_MB();
2947
2948 TCW_PTR(this_thr->th.th_task_team, NULL);
2949 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002950}
2951
Jim Cownie5e8470a2013-09-27 10:38:44 +00002952// __kmp_tasking_barrier:
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002953// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
Jonathan Peyton30419822017-05-12 18:01:32 +00002954// Internal function to execute all tasks prior to a regular barrier or a join
2955// barrier. It is a full barrier itself, which unfortunately turns regular
2956// barriers into double barriers and join barriers into 1 1/2 barriers.
2957void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) {
2958 volatile kmp_uint32 *spin =
2959 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
2960 int flag = FALSE;
2961 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002962
2963#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002964 KMP_FSYNC_SPIN_INIT(spin, (kmp_uint32 *)NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002965#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002966 kmp_flag_32 spin_flag(spin, 0U);
2967 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
2968 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002969#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002970 // TODO: What about itt_sync_obj??
2971 KMP_FSYNC_SPIN_PREPARE(spin);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002972#endif /* USE_ITT_BUILD */
2973
Jonathan Peyton30419822017-05-12 18:01:32 +00002974 if (TCR_4(__kmp_global.g.g_done)) {
2975 if (__kmp_global.g.g_abort)
2976 __kmp_abort_thread();
2977 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002978 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002979 KMP_YIELD(TRUE); // GH: We always yield here
2980 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002981#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002982 KMP_FSYNC_SPIN_ACQUIRED((void *)spin);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002983#endif /* USE_ITT_BUILD */
2984}
2985
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002986#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002987
Jonathan Peyton30419822017-05-12 18:01:32 +00002988// __kmp_give_task puts a task into a given thread queue if:
2989// - the queue for that thread was created
2990// - there's space in that queue
2991// Because of this, __kmp_push_task needs to check if there's space after
2992// getting the lock
2993static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
2994 kmp_int32 pass) {
2995 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
2996 kmp_task_team_t *task_team = taskdata->td_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002997
Jonathan Peyton30419822017-05-12 18:01:32 +00002998 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n",
2999 taskdata, tid));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003000
Jonathan Peyton30419822017-05-12 18:01:32 +00003001 // If task_team is NULL something went really bad...
3002 KMP_DEBUG_ASSERT(task_team != NULL);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003003
Jonathan Peyton30419822017-05-12 18:01:32 +00003004 bool result = false;
3005 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003006
Jonathan Peyton30419822017-05-12 18:01:32 +00003007 if (thread_data->td.td_deque == NULL) {
3008 // There's no queue in this thread, go find another one
3009 // We're guaranteed that at least one thread has a queue
3010 KA_TRACE(30,
3011 ("__kmp_give_task: thread %d has no queue while giving task %p.\n",
3012 tid, taskdata));
3013 return result;
3014 }
Jonathan Peyton134f90d2016-02-11 23:07:30 +00003015
Jonathan Peyton30419822017-05-12 18:01:32 +00003016 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3017 TASK_DEQUE_SIZE(thread_data->td)) {
3018 KA_TRACE(
3019 30,
3020 ("__kmp_give_task: queue is full while giving task %p to thread %d.\n",
3021 taskdata, tid));
3022
3023 // if this deque is bigger than the pass ratio give a chance to another
3024 // thread
3025 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3026 return result;
3027
3028 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3029 __kmp_realloc_task_deque(thread, thread_data);
3030
3031 } else {
3032
3033 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3034
3035 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3036 TASK_DEQUE_SIZE(thread_data->td)) {
3037 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to "
3038 "thread %d.\n",
3039 taskdata, tid));
3040
3041 // if this deque is bigger than the pass ratio give a chance to another
3042 // thread
3043 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3044 goto release_and_exit;
3045
3046 __kmp_realloc_task_deque(thread, thread_data);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003047 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003048 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003049
Jonathan Peyton30419822017-05-12 18:01:32 +00003050 // lock is held here, and there is space in the deque
Jonathan Peytonf4f96952016-05-31 19:07:00 +00003051
Jonathan Peyton30419822017-05-12 18:01:32 +00003052 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
3053 // Wrap index.
3054 thread_data->td.td_deque_tail =
3055 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
3056 TCW_4(thread_data->td.td_deque_ntasks,
3057 TCR_4(thread_data->td.td_deque_ntasks) + 1);
Jonathan Peytonf4f96952016-05-31 19:07:00 +00003058
Jonathan Peyton30419822017-05-12 18:01:32 +00003059 result = true;
3060 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n",
3061 taskdata, tid));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003062
3063release_and_exit:
Jonathan Peyton30419822017-05-12 18:01:32 +00003064 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003065
Jonathan Peyton30419822017-05-12 18:01:32 +00003066 return result;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003067}
3068
Jonathan Peyton30419822017-05-12 18:01:32 +00003069/* The finish of the proxy tasks is divided in two pieces:
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003070 - the top half is the one that can be done from a thread outside the team
3071 - the bottom half must be run from a them within the team
3072
Jonathan Peyton30419822017-05-12 18:01:32 +00003073 In order to run the bottom half the task gets queued back into one of the
3074 threads of the team. Once the td_incomplete_child_task counter of the parent
3075 is decremented the threads can leave the barriers. So, the bottom half needs
3076 to be queued before the counter is decremented. The top half is therefore
3077 divided in two parts:
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003078 - things that can be run before queuing the bottom half
3079 - things that must be run after queuing the bottom half
3080
Jonathan Peyton30419822017-05-12 18:01:32 +00003081 This creates a second race as the bottom half can free the task before the
3082 second top half is executed. To avoid this we use the
3083 td_incomplete_child_task of the proxy task to synchronize the top and bottom
3084 half. */
3085static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3086 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
3087 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3088 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
3089 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003090
Jonathan Peyton30419822017-05-12 18:01:32 +00003091 taskdata->td_flags.complete = 1; // mark the task as completed
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003092
Jonathan Peyton30419822017-05-12 18:01:32 +00003093 if (taskdata->td_taskgroup)
3094 KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_taskgroup->count));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003095
Jonathan Peyton30419822017-05-12 18:01:32 +00003096 // Create an imaginary children for this task so the bottom half cannot
3097 // release the task before we have completed the second top half
3098 TCI_4(taskdata->td_incomplete_child_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003099}
3100
Jonathan Peyton30419822017-05-12 18:01:32 +00003101static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3102 kmp_int32 children = 0;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003103
Jonathan Peyton30419822017-05-12 18:01:32 +00003104 // Predecrement simulated by "- 1" calculation
3105 children =
3106 KMP_TEST_THEN_DEC32(
3107 (kmp_int32 *)(&taskdata->td_parent->td_incomplete_child_tasks)) -
3108 1;
3109 KMP_DEBUG_ASSERT(children >= 0);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003110
Jonathan Peyton30419822017-05-12 18:01:32 +00003111 // Remove the imaginary children
3112 TCD_4(taskdata->td_incomplete_child_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003113}
3114
Jonathan Peyton30419822017-05-12 18:01:32 +00003115static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
3116 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3117 kmp_info_t *thread = __kmp_threads[gtid];
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003118
Jonathan Peyton30419822017-05-12 18:01:32 +00003119 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3120 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
3121 1); // top half must run before bottom half
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003122
Jonathan Peyton30419822017-05-12 18:01:32 +00003123 // We need to wait to make sure the top half is finished
3124 // Spinning here should be ok as this should happen quickly
3125 while (TCR_4(taskdata->td_incomplete_child_tasks) > 0)
3126 ;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003127
Jonathan Peyton30419822017-05-12 18:01:32 +00003128 __kmp_release_deps(gtid, taskdata);
3129 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003130}
3131
3132/*!
3133@ingroup TASKING
3134@param gtid Global Thread ID of encountering thread
3135@param ptask Task which execution is completed
3136
Jonathan Peyton30419822017-05-12 18:01:32 +00003137Execute the completation of a proxy task from a thread of that is part of the
3138team. Run first and bottom halves directly.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003139*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003140void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) {
3141 KMP_DEBUG_ASSERT(ptask != NULL);
3142 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3143 KA_TRACE(
3144 10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
3145 gtid, taskdata));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003146
Jonathan Peyton30419822017-05-12 18:01:32 +00003147 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003148
Jonathan Peyton30419822017-05-12 18:01:32 +00003149 __kmp_first_top_half_finish_proxy(taskdata);
3150 __kmp_second_top_half_finish_proxy(taskdata);
3151 __kmp_bottom_half_finish_proxy(gtid, ptask);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003152
Jonathan Peyton30419822017-05-12 18:01:32 +00003153 KA_TRACE(10,
3154 ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
3155 gtid, taskdata));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003156}
3157
3158/*!
3159@ingroup TASKING
3160@param ptask Task which execution is completed
3161
Jonathan Peyton30419822017-05-12 18:01:32 +00003162Execute the completation of a proxy task from a thread that could not belong to
3163the team.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003164*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003165void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) {
3166 KMP_DEBUG_ASSERT(ptask != NULL);
3167 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003168
Jonathan Peyton30419822017-05-12 18:01:32 +00003169 KA_TRACE(
3170 10,
3171 ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
3172 taskdata));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003173
Jonathan Peyton30419822017-05-12 18:01:32 +00003174 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003175
Jonathan Peyton30419822017-05-12 18:01:32 +00003176 __kmp_first_top_half_finish_proxy(taskdata);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003177
Jonathan Peyton30419822017-05-12 18:01:32 +00003178 // Enqueue task to complete bottom half completion from a thread within the
3179 // corresponding team
3180 kmp_team_t *team = taskdata->td_team;
3181 kmp_int32 nthreads = team->t.t_nproc;
3182 kmp_info_t *thread;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00003183
Jonathan Peyton30419822017-05-12 18:01:32 +00003184 // This should be similar to start_k = __kmp_get_random( thread ) % nthreads
3185 // but we cannot use __kmp_get_random here
3186 kmp_int32 start_k = 0;
3187 kmp_int32 pass = 1;
3188 kmp_int32 k = start_k;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003189
Jonathan Peyton30419822017-05-12 18:01:32 +00003190 do {
3191 // For now we're just linearly trying to find a thread
3192 thread = team->t.t_threads[k];
3193 k = (k + 1) % nthreads;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00003194
Jonathan Peyton30419822017-05-12 18:01:32 +00003195 // we did a full pass through all the threads
3196 if (k == start_k)
3197 pass = pass << 1;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00003198
Jonathan Peyton30419822017-05-12 18:01:32 +00003199 } while (!__kmp_give_task(thread, k, ptask, pass));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003200
Jonathan Peyton30419822017-05-12 18:01:32 +00003201 __kmp_second_top_half_finish_proxy(taskdata);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003202
Jonathan Peyton30419822017-05-12 18:01:32 +00003203 KA_TRACE(
3204 10,
3205 ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
3206 taskdata));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003207}
3208
Jonathan Peyton30419822017-05-12 18:01:32 +00003209// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task
3210// for taskloop
Jonathan Peyton283a2152016-03-02 22:47:51 +00003211//
3212// thread: allocating thread
3213// task_src: pointer to source task to be duplicated
3214// returns: a pointer to the allocated kmp_task_t structure (task).
Jonathan Peyton30419822017-05-12 18:01:32 +00003215kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
3216 kmp_task_t *task;
3217 kmp_taskdata_t *taskdata;
3218 kmp_taskdata_t *taskdata_src;
3219 kmp_taskdata_t *parent_task = thread->th.th_current_task;
3220 size_t shareds_offset;
3221 size_t task_size;
Jonathan Peyton283a2152016-03-02 22:47:51 +00003222
Jonathan Peyton30419822017-05-12 18:01:32 +00003223 KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
3224 task_src));
3225 taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
3226 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
3227 TASK_FULL); // it should not be proxy task
3228 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
3229 task_size = taskdata_src->td_size_alloc;
Jonathan Peyton283a2152016-03-02 22:47:51 +00003230
Jonathan Peyton30419822017-05-12 18:01:32 +00003231 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
3232 KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
3233 task_size));
3234#if USE_FAST_MEMORY
3235 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
3236#else
3237 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
3238#endif /* USE_FAST_MEMORY */
3239 KMP_MEMCPY(taskdata, taskdata_src, task_size);
Jonathan Peyton283a2152016-03-02 22:47:51 +00003240
Jonathan Peyton30419822017-05-12 18:01:32 +00003241 task = KMP_TASKDATA_TO_TASK(taskdata);
Jonathan Peyton283a2152016-03-02 22:47:51 +00003242
Jonathan Peyton30419822017-05-12 18:01:32 +00003243 // Initialize new task (only specific fields not affected by memcpy)
3244 taskdata->td_task_id = KMP_GEN_TASK_ID();
3245 if (task->shareds != NULL) { // need setup shareds pointer
3246 shareds_offset = (char *)task_src->shareds - (char *)taskdata_src;
3247 task->shareds = &((char *)taskdata)[shareds_offset];
3248 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
3249 0);
3250 }
3251 taskdata->td_alloc_thread = thread;
3252 taskdata->td_taskgroup =
3253 parent_task
3254 ->td_taskgroup; // task inherits the taskgroup from the parent task
Jonathan Peyton283a2152016-03-02 22:47:51 +00003255
Jonathan Peyton30419822017-05-12 18:01:32 +00003256 // Only need to keep track of child task counts if team parallel and tasking
3257 // not serialized
3258 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
3259 KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_incomplete_child_tasks));
3260 if (parent_task->td_taskgroup)
3261 KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_taskgroup->count));
3262 // Only need to keep track of allocated child tasks for explicit tasks since
3263 // implicit not deallocated
3264 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
3265 KMP_TEST_THEN_INC32(
3266 (kmp_int32 *)(&taskdata->td_parent->td_allocated_child_tasks));
3267 }
Jonathan Peyton283a2152016-03-02 22:47:51 +00003268
Jonathan Peyton30419822017-05-12 18:01:32 +00003269 KA_TRACE(20,
3270 ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
3271 thread, taskdata, taskdata->td_parent));
Jonathan Peyton283a2152016-03-02 22:47:51 +00003272#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003273 __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid,
3274 (void *)task->routine);
Jonathan Peyton283a2152016-03-02 22:47:51 +00003275#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003276 return task;
Jonathan Peyton283a2152016-03-02 22:47:51 +00003277}
3278
3279// Routine optionally generated by th ecompiler for setting the lastprivate flag
3280// and calling needed constructors for private/firstprivate objects
3281// (used to form taskloop tasks from pattern task)
Jonathan Peyton30419822017-05-12 18:01:32 +00003282typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
Jonathan Peyton283a2152016-03-02 22:47:51 +00003283
Jonathan Peyton283a2152016-03-02 22:47:51 +00003284// __kmp_taskloop_linear: Start tasks of the taskloop linearly
3285//
3286// loc Source location information
3287// gtid Global thread ID
3288// task Task with whole loop iteration range
3289// lb Pointer to loop lower bound
3290// ub Pointer to loop upper bound
3291// st Loop stride
3292// sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3293// grainsize Schedule value if specified
3294// task_dup Tasks duplication routine
Jonathan Peyton30419822017-05-12 18:01:32 +00003295void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
3296 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3297 int sched, kmp_uint64 grainsize, void *task_dup) {
3298 KMP_COUNT_BLOCK(OMP_TASKLOOP);
3299 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
3300 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
3301 kmp_uint64 tc;
3302 kmp_uint64 lower = *lb; // compiler provides global bounds here
3303 kmp_uint64 upper = *ub;
3304 kmp_uint64 i, num_tasks = 0, extras = 0;
3305 kmp_info_t *thread = __kmp_threads[gtid];
3306 kmp_taskdata_t *current_task = thread->th.th_current_task;
3307 kmp_task_t *next_task;
3308 kmp_int32 lastpriv = 0;
3309 size_t lower_offset =
3310 (char *)lb - (char *)task; // remember offset of lb in the task structure
3311 size_t upper_offset =
3312 (char *)ub - (char *)task; // remember offset of ub in the task structure
Jonathan Peyton283a2152016-03-02 22:47:51 +00003313
Jonathan Peyton30419822017-05-12 18:01:32 +00003314 // compute trip count
3315 if (st == 1) { // most common case
3316 tc = upper - lower + 1;
3317 } else if (st < 0) {
3318 tc = (lower - upper) / (-st) + 1;
3319 } else { // st > 0
3320 tc = (upper - lower) / st + 1;
3321 }
3322 if (tc == 0) {
3323 KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
Jonathan Peyton283a2152016-03-02 22:47:51 +00003324 // free the pattern task and exit
Jonathan Peyton30419822017-05-12 18:01:32 +00003325 __kmp_task_start(gtid, task, current_task);
3326 // do not execute anything for zero-trip loop
3327 __kmp_task_finish(gtid, task, current_task);
3328 return;
3329 }
3330
3331 // compute num_tasks/grainsize based on the input provided
3332 switch (sched) {
3333 case 0: // no schedule clause specified, we can choose the default
3334 // let's try to schedule (team_size*10) tasks
3335 grainsize = thread->th.th_team_nproc * 10;
3336 case 2: // num_tasks provided
3337 if (grainsize > tc) {
3338 num_tasks = tc; // too big num_tasks requested, adjust values
3339 grainsize = 1;
3340 extras = 0;
3341 } else {
3342 num_tasks = grainsize;
3343 grainsize = tc / num_tasks;
3344 extras = tc % num_tasks;
3345 }
3346 break;
3347 case 1: // grainsize provided
3348 if (grainsize > tc) {
3349 num_tasks = 1; // too big grainsize requested, adjust values
3350 grainsize = tc;
3351 extras = 0;
3352 } else {
3353 num_tasks = tc / grainsize;
3354 grainsize =
3355 tc /
3356 num_tasks; // adjust grainsize for balanced distribution of iterations
3357 extras = tc % num_tasks;
3358 }
3359 break;
3360 default:
3361 KMP_ASSERT2(0, "unknown scheduling of taskloop");
3362 }
3363 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3364 KMP_DEBUG_ASSERT(num_tasks > extras);
3365 KMP_DEBUG_ASSERT(num_tasks > 0);
3366 KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize "
3367 "%lld, extras %lld\n",
3368 gtid, num_tasks, grainsize, extras));
3369
3370 // Main loop, launch num_tasks tasks, assign grainsize iterations each task
3371 for (i = 0; i < num_tasks; ++i) {
3372 kmp_uint64 chunk_minus_1;
3373 if (extras == 0) {
3374 chunk_minus_1 = grainsize - 1;
3375 } else {
3376 chunk_minus_1 = grainsize;
3377 --extras; // first extras iterations get bigger chunk (grainsize+1)
3378 }
3379 upper = lower + st * chunk_minus_1;
3380 if (i == num_tasks - 1) {
3381 // schedule the last task, set lastprivate flag
3382 lastpriv = 1;
3383#if KMP_DEBUG
3384 if (st == 1)
3385 KMP_DEBUG_ASSERT(upper == *ub);
3386 else if (st > 0)
3387 KMP_DEBUG_ASSERT(upper + st > *ub);
3388 else
3389 KMP_DEBUG_ASSERT(upper + st < *ub);
3390#endif
3391 }
3392 next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
3393 *(kmp_uint64 *)((char *)next_task + lower_offset) =
3394 lower; // adjust task-specific bounds
3395 *(kmp_uint64 *)((char *)next_task + upper_offset) = upper;
3396 if (ptask_dup != NULL)
3397 ptask_dup(next_task, task,
3398 lastpriv); // set lastprivate flag, construct fistprivates, etc.
3399 KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper "
3400 "%lld (offsets %p %p)\n",
3401 gtid, next_task, lower, upper, lower_offset, upper_offset));
3402 __kmp_omp_task(gtid, next_task, true); // schedule new task
3403 lower = upper + st; // adjust lower bound for the next iteration
3404 }
3405 // free the pattern task and exit
3406 __kmp_task_start(gtid, task, current_task);
3407 // do not execute the pattern task, just do bookkeeping
3408 __kmp_task_finish(gtid, task, current_task);
Jonathan Peyton283a2152016-03-02 22:47:51 +00003409}
3410
3411/*!
3412@ingroup TASKING
3413@param loc Source location information
3414@param gtid Global thread ID
3415@param task Task structure
3416@param if_val Value of the if clause
3417@param lb Pointer to loop lower bound
3418@param ub Pointer to loop upper bound
3419@param st Loop stride
3420@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
3421@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3422@param grainsize Schedule value if specified
3423@param task_dup Tasks duplication routine
3424
3425Execute the taskloop construct.
3426*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003427void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
3428 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
3429 int sched, kmp_uint64 grainsize, void *task_dup) {
3430 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3431 KMP_DEBUG_ASSERT(task != NULL);
Jonathan Peyton283a2152016-03-02 22:47:51 +00003432
Jonathan Peyton30419822017-05-12 18:01:32 +00003433 KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub "
3434 "%lld st %lld, grain %llu(%d)\n",
3435 gtid, taskdata, *lb, *ub, st, grainsize, sched));
Jonathan Peyton283a2152016-03-02 22:47:51 +00003436
Jonathan Peyton30419822017-05-12 18:01:32 +00003437 // check if clause value first
3438 if (if_val == 0) { // if(0) specified, mark task as serial
3439 taskdata->td_flags.task_serial = 1;
3440 taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
3441 }
3442 if (nogroup == 0) {
3443 __kmpc_taskgroup(loc, gtid);
3444 }
Jonathan Peyton283a2152016-03-02 22:47:51 +00003445
Jonathan Peyton30419822017-05-12 18:01:32 +00003446 if (1 /* AC: use some heuristic here to choose task scheduling method */) {
3447 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, sched, grainsize,
3448 task_dup);
3449 }
Jonathan Peyton283a2152016-03-02 22:47:51 +00003450
Jonathan Peyton30419822017-05-12 18:01:32 +00003451 if (nogroup == 0) {
3452 __kmpc_end_taskgroup(loc, gtid);
3453 }
3454 KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
Jonathan Peyton283a2152016-03-02 22:47:51 +00003455}
3456
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003457#endif