blob: 8f1eb02499c3a018b996e50a64bcf9f48cb6934b [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_tasking.cpp -- OpenMP 3.0 tasking support.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
Jonathan Peyton45be4502015-08-11 21:36:41 +000019#include "kmp_stats.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000020#include "kmp_wait_release.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
Andrey Churbanove5f44922015-04-29 16:22:07 +000022#if OMPT_SUPPORT
23#include "ompt-specific.h"
24#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000026#include "tsan_annotations.h"
27
Jim Cownie5e8470a2013-09-27 10:38:44 +000028/* forward declaration */
Jonathan Peyton30419822017-05-12 18:01:32 +000029static void __kmp_enable_tasking(kmp_task_team_t *task_team,
30 kmp_info_t *this_thr);
31static void __kmp_alloc_task_deque(kmp_info_t *thread,
32 kmp_thread_data_t *thread_data);
33static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
34 kmp_task_team_t *task_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +000035
Jonathan Peytondf6818b2016-06-14 17:57:47 +000036#ifdef OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000037static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +000038#endif
39
Jim Cownie5e8470a2013-09-27 10:38:44 +000040#ifdef BUILD_TIED_TASK_STACK
41
Jim Cownie5e8470a2013-09-27 10:38:44 +000042// __kmp_trace_task_stack: print the tied tasks from the task stack in order
Jonathan Peyton30419822017-05-12 18:01:32 +000043// from top do bottom
Jim Cownie5e8470a2013-09-27 10:38:44 +000044//
45// gtid: global thread identifier for thread containing stack
46// thread_data: thread data for task team thread containing stack
47// threshold: value above which the trace statement triggers
48// location: string identifying call site of this function (for trace)
Jonathan Peyton30419822017-05-12 18:01:32 +000049static void __kmp_trace_task_stack(kmp_int32 gtid,
50 kmp_thread_data_t *thread_data,
51 int threshold, char *location) {
52 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
53 kmp_taskdata_t **stack_top = task_stack->ts_top;
54 kmp_int32 entries = task_stack->ts_entries;
55 kmp_taskdata_t *tied_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +000056
Jonathan Peyton30419822017-05-12 18:01:32 +000057 KA_TRACE(
58 threshold,
59 ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
60 "first_block = %p, stack_top = %p \n",
61 location, gtid, entries, task_stack->ts_first_block, stack_top));
Jim Cownie5e8470a2013-09-27 10:38:44 +000062
Jonathan Peyton30419822017-05-12 18:01:32 +000063 KMP_DEBUG_ASSERT(stack_top != NULL);
64 KMP_DEBUG_ASSERT(entries > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +000065
Jonathan Peyton30419822017-05-12 18:01:32 +000066 while (entries != 0) {
67 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
68 // fix up ts_top if we need to pop from previous block
69 if (entries & TASK_STACK_INDEX_MASK == 0) {
70 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
Jim Cownie5e8470a2013-09-27 10:38:44 +000071
Jonathan Peyton30419822017-05-12 18:01:32 +000072 stack_block = stack_block->sb_prev;
73 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
Jim Cownie5e8470a2013-09-27 10:38:44 +000074 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000075
Jonathan Peyton30419822017-05-12 18:01:32 +000076 // finish bookkeeping
77 stack_top--;
78 entries--;
79
80 tied_task = *stack_top;
81
82 KMP_DEBUG_ASSERT(tied_task != NULL);
83 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
84
85 KA_TRACE(threshold,
86 ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
87 "stack_top=%p, tied_task=%p\n",
88 location, gtid, entries, stack_top, tied_task));
89 }
90 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
91
92 KA_TRACE(threshold,
93 ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
94 location, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +000095}
96
Jim Cownie5e8470a2013-09-27 10:38:44 +000097// __kmp_init_task_stack: initialize the task stack for the first time
Jonathan Peyton30419822017-05-12 18:01:32 +000098// after a thread_data structure is created.
99// It should not be necessary to do this again (assuming the stack works).
Jim Cownie5e8470a2013-09-27 10:38:44 +0000100//
101// gtid: global thread identifier of calling thread
102// thread_data: thread data for task team thread containing stack
Jonathan Peyton30419822017-05-12 18:01:32 +0000103static void __kmp_init_task_stack(kmp_int32 gtid,
104 kmp_thread_data_t *thread_data) {
105 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
106 kmp_stack_block_t *first_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000107
Jonathan Peyton30419822017-05-12 18:01:32 +0000108 // set up the first block of the stack
109 first_block = &task_stack->ts_first_block;
110 task_stack->ts_top = (kmp_taskdata_t **)first_block;
111 memset((void *)first_block, '\0',
112 TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000113
Jonathan Peyton30419822017-05-12 18:01:32 +0000114 // initialize the stack to be empty
115 task_stack->ts_entries = TASK_STACK_EMPTY;
116 first_block->sb_next = NULL;
117 first_block->sb_prev = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000118}
119
Jim Cownie5e8470a2013-09-27 10:38:44 +0000120// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
121//
122// gtid: global thread identifier for calling thread
123// thread_data: thread info for thread containing stack
Jonathan Peyton30419822017-05-12 18:01:32 +0000124static void __kmp_free_task_stack(kmp_int32 gtid,
125 kmp_thread_data_t *thread_data) {
126 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
127 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128
Jonathan Peyton30419822017-05-12 18:01:32 +0000129 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
130 // free from the second block of the stack
131 while (stack_block != NULL) {
132 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000133
Jonathan Peyton30419822017-05-12 18:01:32 +0000134 stack_block->sb_next = NULL;
135 stack_block->sb_prev = NULL;
136 if (stack_block != &task_stack->ts_first_block) {
137 __kmp_thread_free(thread,
138 stack_block); // free the block, if not the first
Jim Cownie5e8470a2013-09-27 10:38:44 +0000139 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000140 stack_block = next_block;
141 }
142 // initialize the stack to be empty
143 task_stack->ts_entries = 0;
144 task_stack->ts_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000145}
146
Jim Cownie5e8470a2013-09-27 10:38:44 +0000147// __kmp_push_task_stack: Push the tied task onto the task stack.
148// Grow the stack if necessary by allocating another block.
149//
150// gtid: global thread identifier for calling thread
151// thread: thread info for thread containing stack
152// tied_task: the task to push on the stack
Jonathan Peyton30419822017-05-12 18:01:32 +0000153static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
154 kmp_taskdata_t *tied_task) {
155 // GEH - need to consider what to do if tt_threads_data not allocated yet
156 kmp_thread_data_t *thread_data =
157 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
158 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159
Jonathan Peyton30419822017-05-12 18:01:32 +0000160 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
161 return; // Don't push anything on stack if team or team tasks are serialized
162 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000163
Jonathan Peyton30419822017-05-12 18:01:32 +0000164 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
165 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
166
167 KA_TRACE(20,
168 ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
169 gtid, thread, tied_task));
170 // Store entry
171 *(task_stack->ts_top) = tied_task;
172
173 // Do bookkeeping for next push
174 task_stack->ts_top++;
175 task_stack->ts_entries++;
176
177 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
178 // Find beginning of this task block
179 kmp_stack_block_t *stack_block =
180 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
181
182 // Check if we already have a block
183 if (stack_block->sb_next !=
184 NULL) { // reset ts_top to beginning of next block
185 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
186 } else { // Alloc new block and link it up
187 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
188 thread, sizeof(kmp_stack_block_t));
189
190 task_stack->ts_top = &new_block->sb_block[0];
191 stack_block->sb_next = new_block;
192 new_block->sb_prev = stack_block;
193 new_block->sb_next = NULL;
194
195 KA_TRACE(
196 30,
197 ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
198 gtid, tied_task, new_block));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000199 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000200 }
201 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
202 tied_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000203}
204
Jim Cownie5e8470a2013-09-27 10:38:44 +0000205// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
Jonathan Peyton30419822017-05-12 18:01:32 +0000206// the task, just check to make sure it matches the ending task passed in.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000207//
208// gtid: global thread identifier for the calling thread
209// thread: thread info structure containing stack
210// tied_task: the task popped off the stack
211// ending_task: the task that is ending (should match popped task)
Jonathan Peyton30419822017-05-12 18:01:32 +0000212static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
213 kmp_taskdata_t *ending_task) {
214 // GEH - need to consider what to do if tt_threads_data not allocated yet
215 kmp_thread_data_t *thread_data =
216 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
217 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
218 kmp_taskdata_t *tied_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000219
Jonathan Peyton30419822017-05-12 18:01:32 +0000220 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
221 // Don't pop anything from stack if team or team tasks are serialized
Jim Cownie5e8470a2013-09-27 10:38:44 +0000222 return;
Jonathan Peyton30419822017-05-12 18:01:32 +0000223 }
224
225 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
226 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
227
228 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
229 thread));
230
231 // fix up ts_top if we need to pop from previous block
232 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
233 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
234
235 stack_block = stack_block->sb_prev;
236 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
237 }
238
239 // finish bookkeeping
240 task_stack->ts_top--;
241 task_stack->ts_entries--;
242
243 tied_task = *(task_stack->ts_top);
244
245 KMP_DEBUG_ASSERT(tied_task != NULL);
246 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
247 KMP_DEBUG_ASSERT(tied_task == ending_task); // If we built the stack correctly
248
249 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
250 tied_task));
251 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252}
253#endif /* BUILD_TIED_TASK_STACK */
254
Jim Cownie5e8470a2013-09-27 10:38:44 +0000255// __kmp_push_task: Add a task to the thread's deque
Jonathan Peyton30419822017-05-12 18:01:32 +0000256static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
257 kmp_info_t *thread = __kmp_threads[gtid];
258 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
259 kmp_task_team_t *task_team = thread->th.th_task_team;
260 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
261 kmp_thread_data_t *thread_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000262
Jonathan Peyton30419822017-05-12 18:01:32 +0000263 KA_TRACE(20,
264 ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000265
Jonathan Peyton30419822017-05-12 18:01:32 +0000266 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
267 // untied task needs to increment counter so that the task structure is not
268 // freed prematurely
269 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
270 KA_TRACE(
271 20,
272 ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
273 gtid, counter, taskdata));
274 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000275
Jonathan Peyton30419822017-05-12 18:01:32 +0000276 // The first check avoids building task_team thread data if serialized
277 if (taskdata->td_flags.task_serial) {
278 KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning "
279 "TASK_NOT_PUSHED for task %p\n",
280 gtid, taskdata));
281 return TASK_NOT_PUSHED;
282 }
Jonathan Peytone6643da2016-04-18 21:35:14 +0000283
Jonathan Peyton30419822017-05-12 18:01:32 +0000284 // Now that serialized tasks have returned, we can assume that we are not in
285 // immediate exec mode
286 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
287 if (!KMP_TASKING_ENABLED(task_team)) {
288 __kmp_enable_tasking(task_team, thread);
289 }
290 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
291 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000292
Jonathan Peyton30419822017-05-12 18:01:32 +0000293 // Find tasking deque specific to encountering thread
294 thread_data = &task_team->tt.tt_threads_data[tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000295
Jonathan Peyton30419822017-05-12 18:01:32 +0000296 // No lock needed since only owner can allocate
297 if (thread_data->td.td_deque == NULL) {
298 __kmp_alloc_task_deque(thread, thread_data);
299 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000300
Jonathan Peyton30419822017-05-12 18:01:32 +0000301 // Check if deque is full
302 if (TCR_4(thread_data->td.td_deque_ntasks) >=
303 TASK_DEQUE_SIZE(thread_data->td)) {
304 KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning "
305 "TASK_NOT_PUSHED for task %p\n",
306 gtid, taskdata));
307 return TASK_NOT_PUSHED;
308 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000309
Jonathan Peyton30419822017-05-12 18:01:32 +0000310 // Lock the deque for the task push operation
311 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000312
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000313#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000314 // Need to recheck as we can get a proxy task from a thread outside of OpenMP
315 if (TCR_4(thread_data->td.td_deque_ntasks) >=
316 TASK_DEQUE_SIZE(thread_data->td)) {
317 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
318 KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; returning "
319 "TASK_NOT_PUSHED for task %p\n",
320 gtid, taskdata));
321 return TASK_NOT_PUSHED;
322 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000323#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000324 // Must have room since no thread can add tasks but calling thread
325 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
326 TASK_DEQUE_SIZE(thread_data->td));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000327#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000328
Jonathan Peyton30419822017-05-12 18:01:32 +0000329 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
330 taskdata; // Push taskdata
331 // Wrap index.
332 thread_data->td.td_deque_tail =
333 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
334 TCW_4(thread_data->td.td_deque_ntasks,
335 TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count
Jim Cownie5e8470a2013-09-27 10:38:44 +0000336
Jonathan Peyton30419822017-05-12 18:01:32 +0000337 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
338 "task=%p ntasks=%d head=%u tail=%u\n",
339 gtid, taskdata, thread_data->td.td_deque_ntasks,
340 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
Andrey Churbanov5dee8c42016-12-14 08:29:00 +0000341
Jonathan Peyton30419822017-05-12 18:01:32 +0000342 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000343
Jonathan Peyton30419822017-05-12 18:01:32 +0000344 return TASK_SUCCESSFULLY_PUSHED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000345}
346
Jonathan Peyton30419822017-05-12 18:01:32 +0000347// __kmp_pop_current_task_from_thread: set up current task from called thread
348// when team ends
349//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000350// this_thr: thread structure to set current_task in.
Jonathan Peyton30419822017-05-12 18:01:32 +0000351void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
352 KF_TRACE(10, ("__kmp_pop_current_task_from_thread(enter): T#%d "
353 "this_thread=%p, curtask=%p, "
354 "curtask_parent=%p\n",
355 0, this_thr, this_thr->th.th_current_task,
356 this_thr->th.th_current_task->td_parent));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000357
Jonathan Peyton30419822017-05-12 18:01:32 +0000358 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000359
Jonathan Peyton30419822017-05-12 18:01:32 +0000360 KF_TRACE(10, ("__kmp_pop_current_task_from_thread(exit): T#%d "
361 "this_thread=%p, curtask=%p, "
362 "curtask_parent=%p\n",
363 0, this_thr, this_thr->th.th_current_task,
364 this_thr->th.th_current_task->td_parent));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000365}
366
Jonathan Peyton30419822017-05-12 18:01:32 +0000367// __kmp_push_current_task_to_thread: set up current task in called thread for a
368// new team
369//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000370// this_thr: thread structure to set up
371// team: team for implicit task data
372// tid: thread within team to set up
Jonathan Peyton30419822017-05-12 18:01:32 +0000373void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
374 int tid) {
375 // current task of the thread is a parent of the new just created implicit
376 // tasks of new team
377 KF_TRACE(10, ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
378 "curtask=%p "
379 "parent_task=%p\n",
380 tid, this_thr, this_thr->th.th_current_task,
381 team->t.t_implicit_task_taskdata[tid].td_parent));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000382
Jonathan Peyton30419822017-05-12 18:01:32 +0000383 KMP_DEBUG_ASSERT(this_thr != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000384
Jonathan Peyton30419822017-05-12 18:01:32 +0000385 if (tid == 0) {
386 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
387 team->t.t_implicit_task_taskdata[0].td_parent =
388 this_thr->th.th_current_task;
389 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000390 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000391 } else {
392 team->t.t_implicit_task_taskdata[tid].td_parent =
393 team->t.t_implicit_task_taskdata[0].td_parent;
394 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
395 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000396
Jonathan Peyton30419822017-05-12 18:01:32 +0000397 KF_TRACE(10, ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
398 "curtask=%p "
399 "parent_task=%p\n",
400 tid, this_thr, this_thr->th.th_current_task,
401 team->t.t_implicit_task_taskdata[tid].td_parent));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000402}
403
Jim Cownie5e8470a2013-09-27 10:38:44 +0000404// __kmp_task_start: bookkeeping for a task starting execution
Jonathan Peyton30419822017-05-12 18:01:32 +0000405//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000406// GTID: global thread id of calling thread
407// task: task starting execution
408// current_task: task suspending
Jonathan Peyton30419822017-05-12 18:01:32 +0000409static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
410 kmp_taskdata_t *current_task) {
411 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
412 kmp_info_t *thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000413
Jonathan Peyton30419822017-05-12 18:01:32 +0000414 KA_TRACE(10,
415 ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
416 gtid, taskdata, current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000417
Jonathan Peyton30419822017-05-12 18:01:32 +0000418 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000419
Jonathan Peyton30419822017-05-12 18:01:32 +0000420 // mark currently executing task as suspended
421 // TODO: GEH - make sure root team implicit task is initialized properly.
422 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
423 current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000424
Jonathan Peyton30419822017-05-12 18:01:32 +0000425// Add task to stack if tied
Jim Cownie5e8470a2013-09-27 10:38:44 +0000426#ifdef BUILD_TIED_TASK_STACK
Jonathan Peyton30419822017-05-12 18:01:32 +0000427 if (taskdata->td_flags.tiedness == TASK_TIED) {
428 __kmp_push_task_stack(gtid, thread, taskdata);
429 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000430#endif /* BUILD_TIED_TASK_STACK */
431
Jonathan Peyton30419822017-05-12 18:01:32 +0000432 // mark starting task as executing and as current task
433 thread->th.th_current_task = taskdata;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000434
Jonathan Peyton30419822017-05-12 18:01:32 +0000435 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
436 taskdata->td_flags.tiedness == TASK_UNTIED);
437 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
438 taskdata->td_flags.tiedness == TASK_UNTIED);
439 taskdata->td_flags.started = 1;
440 taskdata->td_flags.executing = 1;
441 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
442 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000443
Jonathan Peyton30419822017-05-12 18:01:32 +0000444 // GEH TODO: shouldn't we pass some sort of location identifier here?
445 // APT: yes, we will pass location here.
446 // need to store current thread state (in a thread or taskdata structure)
447 // before setting work_state, otherwise wrong state is set after end of task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000448
Jonathan Peyton30419822017-05-12 18:01:32 +0000449 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000450
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000451#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000452 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
453 kmp_taskdata_t *parent = taskdata->td_parent;
454 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
455 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
456 parent ? &(parent->ompt_task_info.frame) : NULL,
457 taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.function);
458 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000459#endif
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000460#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +0000461 /* OMPT emit all dependences if requested by the tool */
462 if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
463 ompt_callbacks.ompt_callback(ompt_event_task_dependences)) {
464 ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
465 taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.deps,
466 taskdata->ompt_task_info.ndeps);
467 /* We can now free the allocated memory for the dependencies */
468 KMP_OMPT_DEPS_FREE(thread, taskdata->ompt_task_info.deps);
469 taskdata->ompt_task_info.deps = NULL;
470 taskdata->ompt_task_info.ndeps = 0;
471 }
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000472#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000473
Jonathan Peyton30419822017-05-12 18:01:32 +0000474 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000475}
476
Jonathan Peyton30419822017-05-12 18:01:32 +0000477// __kmpc_omp_task_begin_if0: report that a given serialized task has started
478// execution
479//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000480// loc_ref: source location information; points to beginning of task block.
481// gtid: global thread number.
482// task: task thunk for the started task.
Jonathan Peyton30419822017-05-12 18:01:32 +0000483void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
484 kmp_task_t *task) {
485 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
486 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000487
Jonathan Peyton30419822017-05-12 18:01:32 +0000488 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
489 "current_task=%p\n",
490 gtid, loc_ref, taskdata, current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000491
Jonathan Peyton30419822017-05-12 18:01:32 +0000492 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
493 // untied task needs to increment counter so that the task structure is not
494 // freed prematurely
495 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
496 KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
497 "incremented for task %p\n",
498 gtid, counter, taskdata));
499 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000500
Jonathan Peyton30419822017-05-12 18:01:32 +0000501 taskdata->td_flags.task_serial =
502 1; // Execute this task immediately, not deferred.
503 __kmp_task_start(gtid, task, current_task);
Jonathan Peytone6643da2016-04-18 21:35:14 +0000504
Jonathan Peyton30419822017-05-12 18:01:32 +0000505 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
506 loc_ref, taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000507
Jonathan Peyton30419822017-05-12 18:01:32 +0000508 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000509}
510
511#ifdef TASK_UNUSED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000512// __kmpc_omp_task_begin: report that a given task has started execution
513// NEVER GENERATED BY COMPILER, DEPRECATED!!!
Jonathan Peyton30419822017-05-12 18:01:32 +0000514void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
515 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000516
Jonathan Peyton30419822017-05-12 18:01:32 +0000517 KA_TRACE(
518 10,
519 ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
520 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000521
Jonathan Peyton30419822017-05-12 18:01:32 +0000522 __kmp_task_start(gtid, task, current_task);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000523
Jonathan Peyton30419822017-05-12 18:01:32 +0000524 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
525 loc_ref, KMP_TASK_TO_TASKDATA(task)));
526 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000527}
528#endif // TASK_UNUSED
529
Jim Cownie5e8470a2013-09-27 10:38:44 +0000530// __kmp_free_task: free the current task space and the space for shareds
Jim Cownie5e8470a2013-09-27 10:38:44 +0000531//
532// gtid: Global thread ID of calling thread
533// taskdata: task to free
534// thread: thread data structure of caller
Jonathan Peyton30419822017-05-12 18:01:32 +0000535static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
536 kmp_info_t *thread) {
537 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", gtid,
538 taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000539
Jonathan Peyton30419822017-05-12 18:01:32 +0000540 // Check to make sure all flags and counters have the correct values
541 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
542 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
543 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
544 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
545 KMP_DEBUG_ASSERT(TCR_4(taskdata->td_allocated_child_tasks) == 0 ||
546 taskdata->td_flags.task_serial == 1);
547 KMP_DEBUG_ASSERT(TCR_4(taskdata->td_incomplete_child_tasks) == 0);
548
549 taskdata->td_flags.freed = 1;
550 ANNOTATE_HAPPENS_BEFORE(taskdata);
551// deallocate the taskdata and shared variable blocks associated with this task
552#if USE_FAST_MEMORY
553 __kmp_fast_free(thread, taskdata);
554#else /* ! USE_FAST_MEMORY */
555 __kmp_thread_free(thread, taskdata);
Jonathan Peyton0ac7b752016-10-18 17:39:06 +0000556#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000557
Jonathan Peyton30419822017-05-12 18:01:32 +0000558 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000559}
560
Jonathan Peyton30419822017-05-12 18:01:32 +0000561// __kmp_free_task_and_ancestors: free the current task and ancestors without
562// children
563//
564// gtid: Global thread ID of calling thread
565// taskdata: task to free
566// thread: thread data structure of caller
567static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
568 kmp_taskdata_t *taskdata,
569 kmp_info_t *thread) {
570#if OMP_45_ENABLED
571 // Proxy tasks must always be allowed to free their parents
572 // because they can be run in background even in serial mode.
573 kmp_int32 team_serial =
574 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
575 !taskdata->td_flags.proxy;
576#else
577 kmp_int32 team_serial =
578 taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser;
579#endif
580 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
581
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000582 kmp_int32 children =
583 KMP_TEST_THEN_DEC32(&taskdata->td_allocated_child_tasks) - 1;
Jonathan Peyton30419822017-05-12 18:01:32 +0000584 KMP_DEBUG_ASSERT(children >= 0);
585
586 // Now, go up the ancestor tree to see if any ancestors can now be freed.
587 while (children == 0) {
588 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
589
590 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
591 "and freeing itself\n",
592 gtid, taskdata));
593
594 // --- Deallocate my ancestor task ---
595 __kmp_free_task(gtid, taskdata, thread);
596
597 taskdata = parent_taskdata;
598
599 // Stop checking ancestors at implicit task instead of walking up ancestor
600 // tree to avoid premature deallocation of ancestors.
601 if (team_serial || taskdata->td_flags.tasktype == TASK_IMPLICIT)
602 return;
603
604 // Predecrement simulated by "- 1" calculation
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000605 children = KMP_TEST_THEN_DEC32(&taskdata->td_allocated_child_tasks) - 1;
Jonathan Peyton30419822017-05-12 18:01:32 +0000606 KMP_DEBUG_ASSERT(children >= 0);
607 }
608
609 KA_TRACE(
610 20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
611 "not freeing it yet\n",
612 gtid, taskdata, children));
613}
614
Jim Cownie5e8470a2013-09-27 10:38:44 +0000615// __kmp_task_finish: bookkeeping to do when a task finishes execution
Jonathan Peyton30419822017-05-12 18:01:32 +0000616//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000617// gtid: global thread ID for calling thread
618// task: task to be finished
619// resumed_task: task to be resumed. (may be NULL if task is serialized)
Jonathan Peyton30419822017-05-12 18:01:32 +0000620static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
621 kmp_taskdata_t *resumed_task) {
622 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
623 kmp_info_t *thread = __kmp_threads[gtid];
624 kmp_task_team_t *task_team =
625 thread->th.th_task_team; // might be NULL for serial teams...
626 kmp_int32 children = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000627
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000628#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000629 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_end)) {
630 kmp_taskdata_t *parent = taskdata->td_parent;
631 ompt_callbacks.ompt_callback(ompt_event_task_end)(
632 taskdata->ompt_task_info.task_id);
633 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000634#endif
635
Jonathan Peyton30419822017-05-12 18:01:32 +0000636 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
637 "task %p\n",
638 gtid, taskdata, resumed_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000639
Jonathan Peyton30419822017-05-12 18:01:32 +0000640 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000641
Jonathan Peyton30419822017-05-12 18:01:32 +0000642// Pop task from stack if tied
Jim Cownie5e8470a2013-09-27 10:38:44 +0000643#ifdef BUILD_TIED_TASK_STACK
Jonathan Peyton30419822017-05-12 18:01:32 +0000644 if (taskdata->td_flags.tiedness == TASK_TIED) {
645 __kmp_pop_task_stack(gtid, thread, taskdata);
646 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000647#endif /* BUILD_TIED_TASK_STACK */
648
Jonathan Peyton30419822017-05-12 18:01:32 +0000649 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
650 // untied task needs to check the counter so that the task structure is not
651 // freed prematurely
652 kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
653 KA_TRACE(
654 20,
655 ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
656 gtid, counter, taskdata));
657 if (counter > 0) {
658 // untied task is not done, to be continued possibly by other thread, do
659 // not free it now
660 if (resumed_task == NULL) {
661 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
662 resumed_task = taskdata->td_parent; // In a serialized task, the resumed
663 // task is the parent
664 }
665 thread->th.th_current_task = resumed_task; // restore current_task
666 resumed_task->td_flags.executing = 1; // resume previous task
667 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, "
668 "resuming task %p\n",
669 gtid, taskdata, resumed_task));
670 return;
Jonathan Peytone6643da2016-04-18 21:35:14 +0000671 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000672 }
Jonathan Peytone6643da2016-04-18 21:35:14 +0000673
Jonathan Peyton30419822017-05-12 18:01:32 +0000674 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
675 taskdata->td_flags.complete = 1; // mark the task as completed
676 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
677 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000678
Jonathan Peyton30419822017-05-12 18:01:32 +0000679 // Only need to keep track of count if team parallel and tasking not
680 // serialized
681 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
682 // Predecrement simulated by "- 1" calculation
683 children =
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000684 KMP_TEST_THEN_DEC32(&taskdata->td_parent->td_incomplete_child_tasks) -
Jonathan Peyton30419822017-05-12 18:01:32 +0000685 1;
686 KMP_DEBUG_ASSERT(children >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000687#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000688 if (taskdata->td_taskgroup)
689 KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_taskgroup->count));
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000690#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000691 }
692 // if we found proxy tasks there could exist a dependency chain
693 // with the proxy task as origin
694 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
695 (task_team && task_team->tt.tt_found_proxy_tasks)) {
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000696#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000697 __kmp_release_deps(gtid, taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000698#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000699 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000700
Jonathan Peyton30419822017-05-12 18:01:32 +0000701 // td_flags.executing must be marked as 0 after __kmp_release_deps has been
702 // called. Othertwise, if a task is executed immediately from the release_deps
703 // code, the flag will be reset to 1 again by this same function
704 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
705 taskdata->td_flags.executing = 0; // suspend the finishing task
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000706
Jonathan Peyton30419822017-05-12 18:01:32 +0000707 KA_TRACE(
708 20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
709 gtid, taskdata, children));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000710
Jim Cownie181b4bb2013-12-23 17:28:57 +0000711#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000712 /* If the tasks' destructor thunk flag has been set, we need to invoke the
713 destructor thunk that has been generated by the compiler. The code is
714 placed here, since at this point other tasks might have been released
715 hence overlapping the destructor invokations with some other work in the
716 released tasks. The OpenMP spec is not specific on when the destructors
717 are invoked, so we should be free to choose. */
718 if (taskdata->td_flags.destructors_thunk) {
719 kmp_routine_entry_t destr_thunk = task->data1.destructors;
720 KMP_ASSERT(destr_thunk);
721 destr_thunk(gtid, task);
722 }
Jim Cownie181b4bb2013-12-23 17:28:57 +0000723#endif // OMP_40_ENABLED
724
Jonathan Peyton30419822017-05-12 18:01:32 +0000725 // bookkeeping for resuming task:
726 // GEH - note tasking_ser => task_serial
727 KMP_DEBUG_ASSERT(
728 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
729 taskdata->td_flags.task_serial);
730 if (taskdata->td_flags.task_serial) {
731 if (resumed_task == NULL) {
732 resumed_task = taskdata->td_parent; // In a serialized task, the resumed
733 // task is the parent
734 } else
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000735#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000736 if (!(task_team && task_team->tt.tt_found_proxy_tasks))
Jonas Hahnfeldbedc3712016-08-08 10:08:14 +0000737#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000738 {
739 // verify resumed task passed in points to parent
740 KMP_DEBUG_ASSERT(resumed_task == taskdata->td_parent);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000741 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000742 } else {
743 KMP_DEBUG_ASSERT(resumed_task !=
744 NULL); // verify that resumed task is passed as arguemnt
745 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000746
Jonathan Peyton30419822017-05-12 18:01:32 +0000747 // Free this task and then ancestor tasks if they have no children.
748 // Restore th_current_task first as suggested by John:
749 // johnmc: if an asynchronous inquiry peers into the runtime system
750 // it doesn't see the freed task as the current task.
751 thread->th.th_current_task = resumed_task;
752 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000753
Jonathan Peyton30419822017-05-12 18:01:32 +0000754 // TODO: GEH - make sure root team implicit task is initialized properly.
755 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
756 resumed_task->td_flags.executing = 1; // resume previous task
Jim Cownie5e8470a2013-09-27 10:38:44 +0000757
Jonathan Peyton30419822017-05-12 18:01:32 +0000758 KA_TRACE(
759 10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
760 gtid, taskdata, resumed_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000761
Jonathan Peyton30419822017-05-12 18:01:32 +0000762 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000763}
764
Jim Cownie5e8470a2013-09-27 10:38:44 +0000765// __kmpc_omp_task_complete_if0: report that a task has completed execution
Jonathan Peyton30419822017-05-12 18:01:32 +0000766//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000767// loc_ref: source location information; points to end of task block.
768// gtid: global thread number.
769// task: task thunk for the completed task.
Jonathan Peyton30419822017-05-12 18:01:32 +0000770void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
771 kmp_task_t *task) {
772 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
773 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
774 // this routine will provide task to resume
775 __kmp_task_finish(gtid, task, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000776
Jonathan Peyton30419822017-05-12 18:01:32 +0000777 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
778 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
779 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000780}
781
782#ifdef TASK_UNUSED
Jim Cownie5e8470a2013-09-27 10:38:44 +0000783// __kmpc_omp_task_complete: report that a task has completed execution
784// NEVER GENERATED BY COMPILER, DEPRECATED!!!
Jonathan Peyton30419822017-05-12 18:01:32 +0000785void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
786 kmp_task_t *task) {
787 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
788 loc_ref, KMP_TASK_TO_TASKDATA(task)));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000789
Jonathan Peyton30419822017-05-12 18:01:32 +0000790 __kmp_task_finish(gtid, task, NULL); // Not sure how to find task to resume
Jim Cownie5e8470a2013-09-27 10:38:44 +0000791
Jonathan Peyton30419822017-05-12 18:01:32 +0000792 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
793 loc_ref, KMP_TASK_TO_TASKDATA(task)));
794 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000795}
796#endif // TASK_UNUSED
797
Andrey Churbanove5f44922015-04-29 16:22:07 +0000798#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000799// __kmp_task_init_ompt: Initialize OMPT fields maintained by a task. This will
800// only be called after ompt_tool, so we already know whether ompt is enabled
801// or not.
802static inline void __kmp_task_init_ompt(kmp_taskdata_t *task, int tid,
803 void *function) {
804 if (ompt_enabled) {
805 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
806 task->ompt_task_info.function = function;
807 task->ompt_task_info.frame.exit_runtime_frame = NULL;
808 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000809#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000810 task->ompt_task_info.ndeps = 0;
811 task->ompt_task_info.deps = NULL;
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000812#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +0000813 }
Andrey Churbanove5f44922015-04-29 16:22:07 +0000814}
815#endif
816
Jonathan Peyton30419822017-05-12 18:01:32 +0000817// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit
818// task for a given thread
Jim Cownie5e8470a2013-09-27 10:38:44 +0000819//
820// loc_ref: reference to source location of parallel region
821// this_thr: thread data structure corresponding to implicit task
822// team: team for this_thr
823// tid: thread id of given thread within team
824// set_curr_task: TRUE if need to push current task to thread
Jonathan Peyton30419822017-05-12 18:01:32 +0000825// NOTE: Routine does not set up the implicit task ICVS. This is assumed to
826// have already been done elsewhere.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000827// TODO: Get better loc_ref. Value passed in may be NULL
Jonathan Peyton30419822017-05-12 18:01:32 +0000828void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
829 kmp_team_t *team, int tid, int set_curr_task) {
830 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000831
Jonathan Peyton30419822017-05-12 18:01:32 +0000832 KF_TRACE(
833 10,
834 ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
835 tid, team, task, set_curr_task ? "TRUE" : "FALSE"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000836
Jonathan Peyton30419822017-05-12 18:01:32 +0000837 task->td_task_id = KMP_GEN_TASK_ID();
838 task->td_team = team;
839 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info
840 // in debugger)
841 task->td_ident = loc_ref;
842 task->td_taskwait_ident = NULL;
843 task->td_taskwait_counter = 0;
844 task->td_taskwait_thread = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000845
Jonathan Peyton30419822017-05-12 18:01:32 +0000846 task->td_flags.tiedness = TASK_TIED;
847 task->td_flags.tasktype = TASK_IMPLICIT;
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000848#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000849 task->td_flags.proxy = TASK_FULL;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000850#endif
851
Jonathan Peyton30419822017-05-12 18:01:32 +0000852 // All implicit tasks are executed immediately, not deferred
853 task->td_flags.task_serial = 1;
854 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
855 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000856
Jonathan Peyton30419822017-05-12 18:01:32 +0000857 task->td_flags.started = 1;
858 task->td_flags.executing = 1;
859 task->td_flags.complete = 0;
860 task->td_flags.freed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000861
Jim Cownie181b4bb2013-12-23 17:28:57 +0000862#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000863 task->td_depnode = NULL;
Jim Cownie181b4bb2013-12-23 17:28:57 +0000864#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000865
Jonathan Peyton30419822017-05-12 18:01:32 +0000866 if (set_curr_task) { // only do this init first time thread is created
867 task->td_incomplete_child_tasks = 0;
Jonathan Peyton642688b2017-06-01 16:46:36 +0000868 // Not used: don't need to deallocate implicit task
869 task->td_allocated_child_tasks = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000870#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000871 task->td_taskgroup = NULL; // An implicit task does not have taskgroup
872 task->td_dephash = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000873#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000874 __kmp_push_current_task_to_thread(this_thr, team, tid);
875 } else {
876 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
877 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
878 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000879
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000880#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +0000881 __kmp_task_init_ompt(task, tid, NULL);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000882#endif
883
Jonathan Peyton30419822017-05-12 18:01:32 +0000884 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
885 team, task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000886}
887
Jonathan Peyton30419822017-05-12 18:01:32 +0000888// __kmp_finish_implicit_task: Release resources associated to implicit tasks
889// at the end of parallel regions. Some resources are kept for reuse in the next
890// parallel region.
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +0000891//
Jonathan Peyton30419822017-05-12 18:01:32 +0000892// thread: thread data structure corresponding to implicit task
893void __kmp_finish_implicit_task(kmp_info_t *thread) {
894 kmp_taskdata_t *task = thread->th.th_current_task;
895 if (task->td_dephash)
896 __kmp_dephash_free_entries(thread, task->td_dephash);
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +0000897}
898
Jonathan Peyton30419822017-05-12 18:01:32 +0000899// __kmp_free_implicit_task: Release resources associated to implicit tasks
900// when these are destroyed regions
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +0000901//
Jonathan Peyton30419822017-05-12 18:01:32 +0000902// thread: thread data structure corresponding to implicit task
903void __kmp_free_implicit_task(kmp_info_t *thread) {
904 kmp_taskdata_t *task = thread->th.th_current_task;
905 if (task->td_dephash)
906 __kmp_dephash_free(thread, task->td_dephash);
907 task->td_dephash = NULL;
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +0000908}
909
Jonathan Peyton30419822017-05-12 18:01:32 +0000910// Round up a size to a power of two specified by val: Used to insert padding
911// between structures co-allocated using a single malloc() call
912static size_t __kmp_round_up_to_val(size_t size, size_t val) {
913 if (size & (val - 1)) {
914 size &= ~(val - 1);
915 if (size <= KMP_SIZE_T_MAX - val) {
916 size += val; // Round up if there is no overflow.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000917 }; // if
Jonathan Peyton30419822017-05-12 18:01:32 +0000918 }; // if
919 return size;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000920} // __kmp_round_up_to_va
921
Jim Cownie5e8470a2013-09-27 10:38:44 +0000922// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
923//
924// loc_ref: source location information
925// gtid: global thread number.
Jonathan Peyton30419822017-05-12 18:01:32 +0000926// flags: include tiedness & task type (explicit vs. implicit) of the ''new''
927// task encountered. Converted from kmp_int32 to kmp_tasking_flags_t in routine.
928// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including
929// private vars accessed in task.
930// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed
931// in task.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000932// task_entry: Pointer to task code entry point generated by compiler.
933// returns: a pointer to the allocated kmp_task_t structure (task).
Jonathan Peyton30419822017-05-12 18:01:32 +0000934kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
935 kmp_tasking_flags_t *flags,
936 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
937 kmp_routine_entry_t task_entry) {
938 kmp_task_t *task;
939 kmp_taskdata_t *taskdata;
940 kmp_info_t *thread = __kmp_threads[gtid];
941 kmp_team_t *team = thread->th.th_team;
942 kmp_taskdata_t *parent_task = thread->th.th_current_task;
943 size_t shareds_offset;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000944
Jonathan Peyton30419822017-05-12 18:01:32 +0000945 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
946 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
947 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
948 sizeof_shareds, task_entry));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000949
Jonathan Peyton30419822017-05-12 18:01:32 +0000950 if (parent_task->td_flags.final) {
951 if (flags->merged_if0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000952 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000953 flags->final = 1;
954 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000955
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000956#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000957 if (flags->proxy == TASK_PROXY) {
958 flags->tiedness = TASK_UNTIED;
959 flags->merged_if0 = 1;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000960
Jonathan Peyton30419822017-05-12 18:01:32 +0000961 /* are we running in a sequential parallel or tskm_immediate_exec... we need
962 tasking support enabled */
963 if ((thread->th.th_task_team) == NULL) {
964 /* This should only happen if the team is serialized
965 setup a task team and propagate it to the thread */
966 KMP_DEBUG_ASSERT(team->t.t_serialized);
967 KA_TRACE(30,
968 ("T#%d creating task team in __kmp_task_alloc for proxy task\n",
969 gtid));
970 __kmp_task_team_setup(
971 thread, team,
972 1); // 1 indicates setup the current team regardless of nthreads
973 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000974 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000975 kmp_task_team_t *task_team = thread->th.th_task_team;
976
977 /* tasking must be enabled now as the task might not be pushed */
978 if (!KMP_TASKING_ENABLED(task_team)) {
979 KA_TRACE(
980 30,
981 ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
982 __kmp_enable_tasking(task_team, thread);
983 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
984 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
985 // No lock needed since only owner can allocate
986 if (thread_data->td.td_deque == NULL) {
987 __kmp_alloc_task_deque(thread, thread_data);
988 }
989 }
990
991 if (task_team->tt.tt_found_proxy_tasks == FALSE)
992 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
993 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000994#endif
995
Jonathan Peyton30419822017-05-12 18:01:32 +0000996 // Calculate shared structure offset including padding after kmp_task_t struct
997 // to align pointers in shared struct
998 shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
999 shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001000
Jonathan Peyton30419822017-05-12 18:01:32 +00001001 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
1002 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1003 shareds_offset));
1004 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1005 sizeof_shareds));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001006
Jonathan Peyton30419822017-05-12 18:01:32 +00001007// Avoid double allocation here by combining shareds with taskdata
1008#if USE_FAST_MEMORY
1009 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1010 sizeof_shareds);
1011#else /* ! USE_FAST_MEMORY */
1012 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
1013 sizeof_shareds);
1014#endif /* USE_FAST_MEMORY */
1015 ANNOTATE_HAPPENS_AFTER(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001016
Jonathan Peyton30419822017-05-12 18:01:32 +00001017 task = KMP_TASKDATA_TO_TASK(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001018
Jonathan Peyton30419822017-05-12 18:01:32 +00001019// Make sure task & taskdata are aligned appropriately
Andrey Churbanovd1c55042015-01-19 18:29:35 +00001020#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
Jonathan Peyton30419822017-05-12 18:01:32 +00001021 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0);
1022 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001023#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001024 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0);
1025 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001026#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001027 if (sizeof_shareds > 0) {
1028 // Avoid double allocation here by combining shareds with taskdata
1029 task->shareds = &((char *)taskdata)[shareds_offset];
1030 // Make sure shareds struct is aligned to pointer size
1031 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
1032 0);
1033 } else {
1034 task->shareds = NULL;
1035 }
1036 task->routine = task_entry;
1037 task->part_id = 0; // AC: Always start with 0 part id
Jim Cownie5e8470a2013-09-27 10:38:44 +00001038
Jonathan Peyton30419822017-05-12 18:01:32 +00001039 taskdata->td_task_id = KMP_GEN_TASK_ID();
1040 taskdata->td_team = team;
1041 taskdata->td_alloc_thread = thread;
1042 taskdata->td_parent = parent_task;
1043 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
1044 taskdata->td_untied_count = 0;
1045 taskdata->td_ident = loc_ref;
1046 taskdata->td_taskwait_ident = NULL;
1047 taskdata->td_taskwait_counter = 0;
1048 taskdata->td_taskwait_thread = 0;
1049 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001050#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001051 // avoid copying icvs for proxy tasks
1052 if (flags->proxy == TASK_FULL)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001053#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001054 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001055
Jonathan Peyton30419822017-05-12 18:01:32 +00001056 taskdata->td_flags.tiedness = flags->tiedness;
1057 taskdata->td_flags.final = flags->final;
1058 taskdata->td_flags.merged_if0 = flags->merged_if0;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001059#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001060 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001061#endif // OMP_40_ENABLED
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001062#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001063 taskdata->td_flags.proxy = flags->proxy;
1064 taskdata->td_task_team = thread->th.th_task_team;
1065 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001066#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001067 taskdata->td_flags.tasktype = TASK_EXPLICIT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001068
Jonathan Peyton30419822017-05-12 18:01:32 +00001069 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1070 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001071
Jonathan Peyton30419822017-05-12 18:01:32 +00001072 // GEH - TODO: fix this to copy parent task's value of team_serial flag
1073 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001074
Jonathan Peyton30419822017-05-12 18:01:32 +00001075 // GEH - Note we serialize the task if the team is serialized to make sure
1076 // implicit parallel region tasks are not left until program termination to
1077 // execute. Also, it helps locality to execute immediately.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001078
Jonathan Peyton30419822017-05-12 18:01:32 +00001079 taskdata->td_flags.task_serial =
1080 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1081 taskdata->td_flags.tasking_ser);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001082
Jonathan Peyton30419822017-05-12 18:01:32 +00001083 taskdata->td_flags.started = 0;
1084 taskdata->td_flags.executing = 0;
1085 taskdata->td_flags.complete = 0;
1086 taskdata->td_flags.freed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001087
Jonathan Peyton30419822017-05-12 18:01:32 +00001088 taskdata->td_flags.native = flags->native;
1089
1090 taskdata->td_incomplete_child_tasks = 0;
1091 taskdata->td_allocated_child_tasks = 1; // start at one because counts current
1092// task and children
Jim Cownie5e8470a2013-09-27 10:38:44 +00001093#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001094 taskdata->td_taskgroup =
1095 parent_task->td_taskgroup; // task inherits taskgroup from the parent task
1096 taskdata->td_dephash = NULL;
1097 taskdata->td_depnode = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001098#endif
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001099
Jonathan Peyton30419822017-05-12 18:01:32 +00001100// Only need to keep track of child task counts if team parallel and tasking not
1101// serialized or if it is a proxy task
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001102#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001103 if (flags->proxy == TASK_PROXY ||
1104 !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001105#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001106 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001107#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001108 {
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00001109 KMP_TEST_THEN_INC32(&parent_task->td_incomplete_child_tasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001110#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001111 if (parent_task->td_taskgroup)
1112 KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_taskgroup->count));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001113#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001114 // Only need to keep track of allocated child tasks for explicit tasks since
1115 // implicit not deallocated
1116 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00001117 KMP_TEST_THEN_INC32(&taskdata->td_parent->td_allocated_child_tasks);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001118 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001119 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001120
Jonathan Peyton30419822017-05-12 18:01:32 +00001121 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1122 gtid, taskdata, taskdata->td_parent));
1123 ANNOTATE_HAPPENS_BEFORE(task);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001124
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001125#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001126 __kmp_task_init_ompt(taskdata, gtid, (void *)task_entry);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001127#endif
1128
Jonathan Peyton30419822017-05-12 18:01:32 +00001129 return task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001130}
1131
Jonathan Peyton30419822017-05-12 18:01:32 +00001132kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
1133 kmp_int32 flags, size_t sizeof_kmp_task_t,
1134 size_t sizeof_shareds,
1135 kmp_routine_entry_t task_entry) {
1136 kmp_task_t *retval;
1137 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001138
Jonathan Peyton30419822017-05-12 18:01:32 +00001139 input_flags->native = FALSE;
1140// __kmp_task_alloc() sets up all other runtime flags
Jim Cownie5e8470a2013-09-27 10:38:44 +00001141
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001142#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001143 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
1144 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1145 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1146 input_flags->proxy ? "proxy" : "", sizeof_kmp_task_t,
1147 sizeof_shareds, task_entry));
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001148#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001149 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1150 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1151 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1152 sizeof_kmp_task_t, sizeof_shareds, task_entry));
Jonathan Peyton1c9e6432015-06-03 18:24:02 +00001153#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001154
Jonathan Peyton30419822017-05-12 18:01:32 +00001155 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1156 sizeof_shareds, task_entry);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001157
Jonathan Peyton30419822017-05-12 18:01:32 +00001158 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001159
Jonathan Peyton30419822017-05-12 18:01:32 +00001160 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001161}
1162
Jim Cownie5e8470a2013-09-27 10:38:44 +00001163// __kmp_invoke_task: invoke the specified task
1164//
1165// gtid: global thread ID of caller
1166// task: the task to invoke
1167// current_task: the task to resume after task invokation
Jonathan Peyton30419822017-05-12 18:01:32 +00001168static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1169 kmp_taskdata_t *current_task) {
1170 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1171 kmp_uint64 cur_time;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001172#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001173 int discard = 0 /* false */;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001174#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001175 KA_TRACE(
1176 30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1177 gtid, taskdata, current_task));
1178 KMP_DEBUG_ASSERT(task);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001179#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001180 if (taskdata->td_flags.proxy == TASK_PROXY &&
1181 taskdata->td_flags.complete == 1) {
1182 // This is a proxy task that was already completed but it needs to run
1183 // its bottom-half finish
1184 KA_TRACE(
1185 30,
1186 ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1187 gtid, taskdata));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001188
Jonathan Peyton30419822017-05-12 18:01:32 +00001189 __kmp_bottom_half_finish_proxy(gtid, task);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001190
Jonathan Peyton30419822017-05-12 18:01:32 +00001191 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for "
1192 "proxy task %p, resuming task %p\n",
1193 gtid, taskdata, current_task));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001194
Jonathan Peyton30419822017-05-12 18:01:32 +00001195 return;
1196 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001197#endif
1198
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001199#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001200 if (__kmp_forkjoin_frames_mode == 3) {
1201 // Get the current time stamp to measure task execution time to correct
1202 // barrier imbalance time
1203 cur_time = __itt_get_timestamp();
1204 }
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001205#endif
1206
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001207#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001208 // Proxy tasks are not handled by the runtime
1209 if (taskdata->td_flags.proxy != TASK_PROXY) {
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001210#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001211 ANNOTATE_HAPPENS_AFTER(task);
1212 __kmp_task_start(gtid, task, current_task);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001213#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001214 }
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001215#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001216
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001217#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001218 ompt_thread_info_t oldInfo;
1219 kmp_info_t *thread;
1220 if (ompt_enabled) {
1221 // Store the threads states and restore them after the task
1222 thread = __kmp_threads[gtid];
1223 oldInfo = thread->th.ompt_thread_info;
1224 thread->th.ompt_thread_info.wait_id = 0;
1225 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1226 taskdata->ompt_task_info.frame.exit_runtime_frame =
1227 __builtin_frame_address(0);
1228 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001229#endif
1230
Jim Cownie181b4bb2013-12-23 17:28:57 +00001231#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001232 // TODO: cancel tasks if the parallel region has also been cancelled
1233 // TODO: check if this sequence can be hoisted above __kmp_task_start
1234 // if cancellation has been enabled for this run ...
1235 if (__kmp_omp_cancellation) {
1236 kmp_info_t *this_thr = __kmp_threads[gtid];
1237 kmp_team_t *this_team = this_thr->th.th_team;
1238 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1239 if ((taskgroup && taskgroup->cancel_request) ||
1240 (this_team->t.t_cancel_request == cancel_parallel)) {
1241 KMP_COUNT_BLOCK(TASK_cancelled);
1242 // this task belongs to a task group and we need to cancel it
1243 discard = 1 /* true */;
Jim Cownie181b4bb2013-12-23 17:28:57 +00001244 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001245 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001246
Jonathan Peyton30419822017-05-12 18:01:32 +00001247 // Invoke the task routine and pass in relevant data.
1248 // Thunks generated by gcc take a different argument list.
1249 if (!discard) {
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001250#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001251 KMP_COUNT_BLOCK(TASK_executed);
1252 switch (KMP_GET_THREAD_STATE()) {
1253 case FORK_JOIN_BARRIER:
1254 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1255 break;
1256 case PLAIN_BARRIER:
1257 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1258 break;
1259 case TASKYIELD:
1260 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1261 break;
1262 case TASKWAIT:
1263 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1264 break;
1265 case TASKGROUP:
1266 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1267 break;
1268 default:
1269 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1270 break;
1271 }
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001272#endif // KMP_STATS_ENABLED
Jim Cownie181b4bb2013-12-23 17:28:57 +00001273#endif // OMP_40_ENABLED
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001274
1275#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001276 /* let OMPT know that we're about to run this task */
1277 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
1278 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1279 current_task->ompt_task_info.task_id,
1280 taskdata->ompt_task_info.task_id);
1281 }
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001282#endif
1283
Jim Cownie5e8470a2013-09-27 10:38:44 +00001284#ifdef KMP_GOMP_COMPAT
Jonathan Peyton30419822017-05-12 18:01:32 +00001285 if (taskdata->td_flags.native) {
1286 ((void (*)(void *))(*(task->routine)))(task->shareds);
1287 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001288#endif /* KMP_GOMP_COMPAT */
Jonathan Peyton30419822017-05-12 18:01:32 +00001289 {
1290 (*(task->routine))(gtid, task);
1291 }
1292 KMP_POP_PARTITIONED_TIMER();
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001293
1294#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001295 /* let OMPT know that we're returning to the callee task */
1296 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
1297 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1298 taskdata->ompt_task_info.task_id,
1299 current_task->ompt_task_info.task_id);
1300 }
Jonathan Peytonadee8c52015-11-11 17:49:50 +00001301#endif
1302
Jim Cownie181b4bb2013-12-23 17:28:57 +00001303#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001304 }
Jim Cownie181b4bb2013-12-23 17:28:57 +00001305#endif // OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001306
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001307#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001308 if (ompt_enabled) {
1309 thread->th.ompt_thread_info = oldInfo;
1310 taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
1311 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001312#endif
1313
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001314#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001315 // Proxy tasks are not handled by the runtime
1316 if (taskdata->td_flags.proxy != TASK_PROXY) {
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001317#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001318 ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
1319 __kmp_task_finish(gtid, task, current_task);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001320#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001321 }
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001322#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001323
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001324#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001325 // Barrier imbalance - correct arrive time after the task finished
1326 if (__kmp_forkjoin_frames_mode == 3) {
1327 kmp_info_t *this_thr = __kmp_threads[gtid];
1328 if (this_thr->th.th_bar_arrive_time) {
1329 this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001330 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001331 }
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001332#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001333 KA_TRACE(
1334 30,
1335 ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1336 gtid, taskdata, current_task));
1337 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001338}
1339
Jim Cownie5e8470a2013-09-27 10:38:44 +00001340// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1341//
1342// loc_ref: location of original task pragma (ignored)
1343// gtid: Global Thread ID of encountering thread
1344// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1345// Returns:
Jonathan Peyton30419822017-05-12 18:01:32 +00001346// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
1347// be resumed later.
1348// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
1349// resumed later.
1350kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
1351 kmp_task_t *new_task) {
1352 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001353
Jonathan Peyton30419822017-05-12 18:01:32 +00001354 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1355 loc_ref, new_taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001356
Jonathan Peyton30419822017-05-12 18:01:32 +00001357 /* Should we execute the new task or queue it? For now, let's just always try
1358 to queue it. If the queue fills up, then we'll execute it. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001359
Jonathan Peyton30419822017-05-12 18:01:32 +00001360 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
1361 { // Execute this task immediately
1362 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1363 new_taskdata->td_flags.task_serial = 1;
1364 __kmp_invoke_task(gtid, new_task, current_task);
1365 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001366
Jonathan Peyton30419822017-05-12 18:01:32 +00001367 KA_TRACE(
1368 10,
1369 ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1370 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1371 gtid, loc_ref, new_taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001372
Jonathan Peyton30419822017-05-12 18:01:32 +00001373 ANNOTATE_HAPPENS_BEFORE(new_task);
1374 return TASK_CURRENT_NOT_QUEUED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001375}
1376
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001377// __kmp_omp_task: Schedule a non-thread-switchable task for execution
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001378//
Jonathan Peyton30419822017-05-12 18:01:32 +00001379// gtid: Global Thread ID of encountering thread
1380// new_task:non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1381// serialize_immediate: if TRUE then if the task is executed immediately its
1382// execution will be serialized
1383// Returns:
1384// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
1385// be resumed later.
1386// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
1387// resumed later.
1388kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1389 bool serialize_immediate) {
1390 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001391
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001392#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001393 if (ompt_enabled) {
1394 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1395 __builtin_frame_address(1);
1396 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001397#endif
1398
Jonathan Peyton30419822017-05-12 18:01:32 +00001399/* Should we execute the new task or queue it? For now, let's just always try to
1400 queue it. If the queue fills up, then we'll execute it. */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001401#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001402 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
1403 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001404#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001405 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001406#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001407 { // Execute this task immediately
1408 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1409 if (serialize_immediate)
1410 new_taskdata->td_flags.task_serial = 1;
1411 __kmp_invoke_task(gtid, new_task, current_task);
1412 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001413
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001414#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001415 if (ompt_enabled) {
1416 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
1417 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001418#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001419
Jonathan Peyton30419822017-05-12 18:01:32 +00001420 ANNOTATE_HAPPENS_BEFORE(new_task);
1421 return TASK_CURRENT_NOT_QUEUED;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001422}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001423
Jonathan Peyton30419822017-05-12 18:01:32 +00001424// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a
1425// non-thread-switchable task from the parent thread only!
1426//
Jim Cownie5e8470a2013-09-27 10:38:44 +00001427// loc_ref: location of original task pragma (ignored)
1428// gtid: Global Thread ID of encountering thread
Jonathan Peyton30419822017-05-12 18:01:32 +00001429// new_task: non-thread-switchable task thunk allocated by
1430// __kmp_omp_task_alloc()
1431// Returns:
1432// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
1433// be resumed later.
1434// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
1435// resumed later.
1436kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
1437 kmp_task_t *new_task) {
1438 kmp_int32 res;
1439 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001440
Jonathan Peytond2eb3c72015-08-26 20:02:21 +00001441#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001442 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
Jonathan Peytond2eb3c72015-08-26 20:02:21 +00001443#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001444 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1445 new_taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001446
Jonathan Peyton30419822017-05-12 18:01:32 +00001447 res = __kmp_omp_task(gtid, new_task, true);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001448
Jonathan Peyton30419822017-05-12 18:01:32 +00001449 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
1450 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1451 gtid, loc_ref, new_taskdata));
1452 return res;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001453}
1454
Jonathan Peyton30419822017-05-12 18:01:32 +00001455// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are
1456// complete
1457kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
1458 kmp_taskdata_t *taskdata;
1459 kmp_info_t *thread;
1460 int thread_finished = FALSE;
1461 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001462
Jonathan Peyton30419822017-05-12 18:01:32 +00001463 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001464
Jonathan Peyton30419822017-05-12 18:01:32 +00001465 if (__kmp_tasking_mode != tskm_immediate_exec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001466 thread = __kmp_threads[gtid];
1467 taskdata = thread->th.th_current_task;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001468#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001469 ompt_task_id_t my_task_id;
1470 ompt_parallel_id_t my_parallel_id;
Jonathan Peyton61118492016-05-20 19:03:38 +00001471
Jonathan Peyton30419822017-05-12 18:01:32 +00001472 if (ompt_enabled) {
1473 kmp_team_t *team = thread->th.th_team;
1474 my_task_id = taskdata->ompt_task_info.task_id;
1475 my_parallel_id = team->t.ompt_team_info.parallel_id;
Jonathan Peyton61118492016-05-20 19:03:38 +00001476
Jonathan Peyton30419822017-05-12 18:01:32 +00001477 taskdata->ompt_task_info.frame.reenter_runtime_frame =
1478 __builtin_frame_address(1);
1479 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1480 ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(my_parallel_id,
1481 my_task_id);
1482 }
1483 }
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001484#endif
1485
Jonathan Peyton30419822017-05-12 18:01:32 +00001486// Debugger: The taskwait is active. Store location and thread encountered the
1487// taskwait.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001488#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001489// Note: These values are used by ITT events as well.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001490#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001491 taskdata->td_taskwait_counter += 1;
1492 taskdata->td_taskwait_ident = loc_ref;
1493 taskdata->td_taskwait_thread = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001494
1495#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001496 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1497 if (itt_sync_obj != NULL)
1498 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001499#endif /* USE_ITT_BUILD */
1500
Jonathan Peyton30419822017-05-12 18:01:32 +00001501 bool must_wait =
1502 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
Andrey Churbanovdd313b02016-11-01 08:33:36 +00001503
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001504#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001505 must_wait = must_wait || (thread->th.th_task_team != NULL &&
1506 thread->th.th_task_team->tt.tt_found_proxy_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001507#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001508 if (must_wait) {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001509 kmp_flag_32 flag(
1510 RCAST(volatile kmp_uint32 *, &taskdata->td_incomplete_child_tasks),
1511 0U);
Jonathan Peyton30419822017-05-12 18:01:32 +00001512 while (TCR_4(taskdata->td_incomplete_child_tasks) != 0) {
1513 flag.execute_tasks(thread, gtid, FALSE,
1514 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1515 __kmp_task_stealing_constraint);
1516 }
1517 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001518#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001519 if (itt_sync_obj != NULL)
1520 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001521#endif /* USE_ITT_BUILD */
1522
Jonathan Peyton30419822017-05-12 18:01:32 +00001523 // Debugger: The taskwait is completed. Location remains, but thread is
1524 // negated.
1525 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
Jonathan Peyton960ea2f2015-11-09 15:57:04 +00001526
1527#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001528 if (ompt_enabled) {
1529 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1530 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(my_parallel_id,
1531 my_task_id);
1532 }
1533 taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001534 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001535#endif
1536 ANNOTATE_HAPPENS_AFTER(taskdata);
1537 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001538
Jonathan Peyton30419822017-05-12 18:01:32 +00001539 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1540 "returning TASK_CURRENT_NOT_QUEUED\n",
1541 gtid, taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001542
Jonathan Peyton30419822017-05-12 18:01:32 +00001543 return TASK_CURRENT_NOT_QUEUED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001544}
1545
Jim Cownie5e8470a2013-09-27 10:38:44 +00001546// __kmpc_omp_taskyield: switch to a different task
Jonathan Peyton30419822017-05-12 18:01:32 +00001547kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
1548 kmp_taskdata_t *taskdata;
1549 kmp_info_t *thread;
1550 int thread_finished = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001551
Jonathan Peyton30419822017-05-12 18:01:32 +00001552 KMP_COUNT_BLOCK(OMP_TASKYIELD);
1553 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001554
Jonathan Peyton30419822017-05-12 18:01:32 +00001555 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1556 gtid, loc_ref, end_part));
Jonathan Peyton45be4502015-08-11 21:36:41 +00001557
Jonathan Peyton30419822017-05-12 18:01:32 +00001558 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001559 thread = __kmp_threads[gtid];
1560 taskdata = thread->th.th_current_task;
1561// Should we model this as a task wait or not?
1562// Debugger: The taskwait is active. Store location and thread encountered the
1563// taskwait.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001564#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001565// Note: These values are used by ITT events as well.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001566#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001567 taskdata->td_taskwait_counter += 1;
1568 taskdata->td_taskwait_ident = loc_ref;
1569 taskdata->td_taskwait_thread = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001570
1571#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001572 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1573 if (itt_sync_obj != NULL)
1574 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001575#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001576 if (!taskdata->td_flags.team_serial) {
1577 kmp_task_team_t *task_team = thread->th.th_task_team;
1578 if (task_team != NULL) {
1579 if (KMP_TASKING_ENABLED(task_team)) {
1580 __kmp_execute_tasks_32(
1581 thread, gtid, NULL, FALSE,
1582 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1583 __kmp_task_stealing_constraint);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001584 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001585 }
1586 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001587#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001588 if (itt_sync_obj != NULL)
1589 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001590#endif /* USE_ITT_BUILD */
1591
Jonathan Peyton30419822017-05-12 18:01:32 +00001592 // Debugger: The taskwait is completed. Location remains, but thread is
1593 // negated.
1594 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
1595 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001596
Jonathan Peyton30419822017-05-12 18:01:32 +00001597 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1598 "returning TASK_CURRENT_NOT_QUEUED\n",
1599 gtid, taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001600
Jonathan Peyton30419822017-05-12 18:01:32 +00001601 return TASK_CURRENT_NOT_QUEUED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001602}
1603
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001604// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
1605#if OMP_45_ENABLED
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001606// Task Reduction implementation
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001607
1608typedef struct kmp_task_red_flags {
Jonathan Peyton30419822017-05-12 18:01:32 +00001609 unsigned lazy_priv : 1; // hint: (1) use lazy allocation (big objects)
1610 unsigned reserved31 : 31;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001611} kmp_task_red_flags_t;
1612
1613// internal structure for reduction data item related info
1614typedef struct kmp_task_red_data {
Jonathan Peyton30419822017-05-12 18:01:32 +00001615 void *reduce_shar; // shared reduction item
1616 size_t reduce_size; // size of data item
1617 void *reduce_priv; // thread specific data
1618 void *reduce_pend; // end of private data for comparison op
1619 void *reduce_init; // data initialization routine
1620 void *reduce_fini; // data finalization routine
1621 void *reduce_comb; // data combiner routine
1622 kmp_task_red_flags_t flags; // flags for additional info from compiler
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001623} kmp_task_red_data_t;
1624
1625// structure sent us by compiler - one per reduction item
1626typedef struct kmp_task_red_input {
Jonathan Peyton30419822017-05-12 18:01:32 +00001627 void *reduce_shar; // shared reduction item
1628 size_t reduce_size; // size of data item
1629 void *reduce_init; // data initialization routine
1630 void *reduce_fini; // data finalization routine
1631 void *reduce_comb; // data combiner routine
1632 kmp_task_red_flags_t flags; // flags for additional info from compiler
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001633} kmp_task_red_input_t;
1634
1635/*!
1636@ingroup TASKING
1637@param gtid Global thread ID
1638@param num Number of data items to reduce
1639@param data Array of data for reduction
1640@return The taskgroup identifier
1641
1642Initialize task reduction for the taskgroup.
1643*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001644void *__kmpc_task_reduction_init(int gtid, int num, void *data) {
1645 kmp_info_t *thread = __kmp_threads[gtid];
1646 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
1647 kmp_int32 nth = thread->th.th_team_nproc;
1648 kmp_task_red_input_t *input = (kmp_task_red_input_t *)data;
1649 kmp_task_red_data_t *arr;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001650
Jonathan Peyton30419822017-05-12 18:01:32 +00001651 // check input data just in case
1652 KMP_ASSERT(tg != NULL);
1653 KMP_ASSERT(data != NULL);
1654 KMP_ASSERT(num > 0);
1655 if (nth == 1) {
1656 KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
1657 gtid, tg));
1658 return (void *)tg;
1659 }
1660 KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
1661 gtid, tg, num));
1662 arr = (kmp_task_red_data_t *)__kmp_thread_malloc(
1663 thread, num * sizeof(kmp_task_red_data_t));
1664 for (int i = 0; i < num; ++i) {
1665 void (*f_init)(void *) = (void (*)(void *))(input[i].reduce_init);
1666 size_t size = input[i].reduce_size - 1;
1667 // round the size up to cache line per thread-specific item
1668 size += CACHE_LINE - size % CACHE_LINE;
1669 KMP_ASSERT(input[i].reduce_comb != NULL); // combiner is mandatory
1670 arr[i].reduce_shar = input[i].reduce_shar;
1671 arr[i].reduce_size = size;
1672 arr[i].reduce_init = input[i].reduce_init;
1673 arr[i].reduce_fini = input[i].reduce_fini;
1674 arr[i].reduce_comb = input[i].reduce_comb;
1675 arr[i].flags = input[i].flags;
1676 if (!input[i].flags.lazy_priv) {
1677 // allocate cache-line aligned block and fill it with zeros
1678 arr[i].reduce_priv = __kmp_allocate(nth * size);
1679 arr[i].reduce_pend = (char *)(arr[i].reduce_priv) + nth * size;
1680 if (f_init != NULL) {
1681 // initialize thread-specific items
1682 for (int j = 0; j < nth; ++j) {
1683 f_init((char *)(arr[i].reduce_priv) + j * size);
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001684 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001685 }
1686 } else {
1687 // only allocate space for pointers now,
1688 // objects will be lazily allocated/initialized once requested
1689 arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void *));
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001690 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001691 }
1692 tg->reduce_data = (void *)arr;
1693 tg->reduce_num_data = num;
1694 return (void *)tg;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001695}
1696
1697/*!
1698@ingroup TASKING
1699@param gtid Global thread ID
1700@param tskgrp The taskgroup ID (optional)
1701@param data Shared location of the item
1702@return The pointer to per-thread data
1703
1704Get thread-specific location of data item
1705*/
Jonathan Peyton30419822017-05-12 18:01:32 +00001706void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
1707 kmp_info_t *thread = __kmp_threads[gtid];
1708 kmp_int32 nth = thread->th.th_team_nproc;
1709 if (nth == 1)
1710 return data; // nothing to do
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001711
Jonathan Peyton30419822017-05-12 18:01:32 +00001712 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
1713 if (tg == NULL)
1714 tg = thread->th.th_current_task->td_taskgroup;
1715 KMP_ASSERT(tg != NULL);
1716 kmp_task_red_data_t *arr = (kmp_task_red_data_t *)(tg->reduce_data);
1717 kmp_int32 num = tg->reduce_num_data;
1718 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001719
Jonathan Peyton30419822017-05-12 18:01:32 +00001720 KMP_ASSERT(data != NULL);
1721 while (tg != NULL) {
1722 for (int i = 0; i < num; ++i) {
1723 if (!arr[i].flags.lazy_priv) {
1724 if (data == arr[i].reduce_shar ||
1725 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
1726 return (char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
1727 } else {
1728 // check shared location first
1729 void **p_priv = (void **)(arr[i].reduce_priv);
1730 if (data == arr[i].reduce_shar)
1731 goto found;
1732 // check if we get some thread specific location as parameter
1733 for (int j = 0; j < nth; ++j)
1734 if (data == p_priv[j])
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001735 goto found;
Jonathan Peyton30419822017-05-12 18:01:32 +00001736 continue; // not found, continue search
1737 found:
1738 if (p_priv[tid] == NULL) {
1739 // allocate thread specific object lazily
1740 void (*f_init)(void *) = (void (*)(void *))(arr[i].reduce_init);
1741 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
1742 if (f_init != NULL) {
1743 f_init(p_priv[tid]);
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001744 }
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001745 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001746 return p_priv[tid];
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001747 }
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001748 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001749 tg = tg->parent;
1750 arr = (kmp_task_red_data_t *)(tg->reduce_data);
1751 num = tg->reduce_num_data;
1752 }
1753 KMP_ASSERT2(0, "Unknown task reduction item");
1754 return NULL; // ERROR, this line never executed
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001755}
1756
1757// Finalize task reduction.
1758// Called from __kmpc_end_taskgroup()
Jonathan Peyton30419822017-05-12 18:01:32 +00001759static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
1760 kmp_int32 nth = th->th.th_team_nproc;
1761 KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1
1762 kmp_task_red_data_t *arr = (kmp_task_red_data_t *)tg->reduce_data;
1763 kmp_int32 num = tg->reduce_num_data;
1764 for (int i = 0; i < num; ++i) {
1765 void *sh_data = arr[i].reduce_shar;
1766 void (*f_fini)(void *) = (void (*)(void *))(arr[i].reduce_fini);
1767 void (*f_comb)(void *, void *) =
1768 (void (*)(void *, void *))(arr[i].reduce_comb);
1769 if (!arr[i].flags.lazy_priv) {
1770 void *pr_data = arr[i].reduce_priv;
1771 size_t size = arr[i].reduce_size;
1772 for (int j = 0; j < nth; ++j) {
1773 void *priv_data = (char *)pr_data + j * size;
1774 f_comb(sh_data, priv_data); // combine results
1775 if (f_fini)
1776 f_fini(priv_data); // finalize if needed
1777 }
1778 } else {
1779 void **pr_data = (void **)(arr[i].reduce_priv);
1780 for (int j = 0; j < nth; ++j) {
1781 if (pr_data[j] != NULL) {
1782 f_comb(sh_data, pr_data[j]); // combine results
1783 if (f_fini)
1784 f_fini(pr_data[j]); // finalize if needed
1785 __kmp_free(pr_data[j]);
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001786 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001787 }
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001788 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001789 __kmp_free(arr[i].reduce_priv);
1790 }
1791 __kmp_thread_free(th, arr);
1792 tg->reduce_data = NULL;
1793 tg->reduce_num_data = 0;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001794}
1795#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001796
1797#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001798// __kmpc_taskgroup: Start a new taskgroup
Jonathan Peyton30419822017-05-12 18:01:32 +00001799void __kmpc_taskgroup(ident_t *loc, int gtid) {
1800 kmp_info_t *thread = __kmp_threads[gtid];
1801 kmp_taskdata_t *taskdata = thread->th.th_current_task;
1802 kmp_taskgroup_t *tg_new =
1803 (kmp_taskgroup_t *)__kmp_thread_malloc(thread, sizeof(kmp_taskgroup_t));
1804 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
1805 tg_new->count = 0;
1806 tg_new->cancel_request = cancel_noreq;
1807 tg_new->parent = taskdata->td_taskgroup;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001808// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
1809#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001810 tg_new->reduce_data = NULL;
1811 tg_new->reduce_num_data = 0;
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001812#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001813 taskdata->td_taskgroup = tg_new;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001814}
1815
Jim Cownie5e8470a2013-09-27 10:38:44 +00001816// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1817// and its descendants are complete
Jonathan Peyton30419822017-05-12 18:01:32 +00001818void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
1819 kmp_info_t *thread = __kmp_threads[gtid];
1820 kmp_taskdata_t *taskdata = thread->th.th_current_task;
1821 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1822 int thread_finished = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001823
Jonathan Peyton30419822017-05-12 18:01:32 +00001824 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
1825 KMP_DEBUG_ASSERT(taskgroup != NULL);
1826 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001827
Jonathan Peyton30419822017-05-12 18:01:32 +00001828 if (__kmp_tasking_mode != tskm_immediate_exec) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001829#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001830 // For ITT the taskgroup wait is similar to taskwait until we need to
1831 // distinguish them
1832 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1833 if (itt_sync_obj != NULL)
1834 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001835#endif /* USE_ITT_BUILD */
1836
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001837#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001838 if (!taskdata->td_flags.team_serial ||
1839 (thread->th.th_task_team != NULL &&
1840 thread->th.th_task_team->tt.tt_found_proxy_tasks))
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001841#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001842 if (!taskdata->td_flags.team_serial)
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001843#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001844 {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001845 kmp_flag_32 flag(RCAST(kmp_uint32 *, &taskgroup->count), 0U);
Jonathan Peyton30419822017-05-12 18:01:32 +00001846 while (TCR_4(taskgroup->count) != 0) {
1847 flag.execute_tasks(thread, gtid, FALSE,
1848 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1849 __kmp_task_stealing_constraint);
1850 }
1851 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001852
1853#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001854 if (itt_sync_obj != NULL)
1855 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001856#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001857 }
1858 KMP_DEBUG_ASSERT(taskgroup->count == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001859
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001860// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
1861#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001862 if (taskgroup->reduce_data != NULL) // need to reduce?
1863 __kmp_task_reduction_fini(thread, taskgroup);
Andrey Churbanov72ba2102017-02-16 17:49:49 +00001864#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001865 // Restore parent taskgroup for the current task
1866 taskdata->td_taskgroup = taskgroup->parent;
1867 __kmp_thread_free(thread, taskgroup);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001868
Jonathan Peyton30419822017-05-12 18:01:32 +00001869 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
1870 gtid, taskdata));
1871 ANNOTATE_HAPPENS_AFTER(taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001872}
1873#endif
1874
Jim Cownie5e8470a2013-09-27 10:38:44 +00001875// __kmp_remove_my_task: remove a task from my own deque
Jonathan Peyton30419822017-05-12 18:01:32 +00001876static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
1877 kmp_task_team_t *task_team,
1878 kmp_int32 is_constrained) {
1879 kmp_task_t *task;
1880 kmp_taskdata_t *taskdata;
1881 kmp_thread_data_t *thread_data;
1882 kmp_uint32 tail;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001883
Jonathan Peyton30419822017-05-12 18:01:32 +00001884 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
1885 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
1886 NULL); // Caller should check this condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001887
Jonathan Peyton30419822017-05-12 18:01:32 +00001888 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001889
Jonathan Peyton30419822017-05-12 18:01:32 +00001890 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1891 gtid, thread_data->td.td_deque_ntasks,
1892 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001893
Jonathan Peyton30419822017-05-12 18:01:32 +00001894 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
1895 KA_TRACE(10,
1896 ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
1897 "ntasks=%d head=%u tail=%u\n",
1898 gtid, thread_data->td.td_deque_ntasks,
1899 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
1900 return NULL;
1901 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001902
Jonathan Peyton30419822017-05-12 18:01:32 +00001903 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
1904
1905 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
1906 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
1907 KA_TRACE(10,
1908 ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
1909 "ntasks=%d head=%u tail=%u\n",
1910 gtid, thread_data->td.td_deque_ntasks,
1911 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
1912 return NULL;
1913 }
1914
1915 tail = (thread_data->td.td_deque_tail - 1) &
1916 TASK_DEQUE_MASK(thread_data->td); // Wrap index.
1917 taskdata = thread_data->td.td_deque[tail];
1918
1919 if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
1920 // we need to check if the candidate obeys task scheduling constraint:
1921 // only child of current task can be scheduled
1922 kmp_taskdata_t *current = thread->th.th_current_task;
1923 kmp_int32 level = current->td_level;
1924 kmp_taskdata_t *parent = taskdata->td_parent;
1925 while (parent != current && parent->td_level > level) {
1926 parent = parent->td_parent; // check generation up to the level of the
1927 // current task
1928 KMP_DEBUG_ASSERT(parent != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001929 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001930 if (parent != current) {
1931 // If the tail task is not a child, then no other child can appear in the
1932 // deque.
1933 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
1934 KA_TRACE(10,
1935 ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
1936 "ntasks=%d head=%u tail=%u\n",
1937 gtid, thread_data->td.td_deque_ntasks,
1938 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
1939 return NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001940 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001941 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001942
Jonathan Peyton30419822017-05-12 18:01:32 +00001943 thread_data->td.td_deque_tail = tail;
1944 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001945
Jonathan Peyton30419822017-05-12 18:01:32 +00001946 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001947
Jonathan Peyton30419822017-05-12 18:01:32 +00001948 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: "
1949 "ntasks=%d head=%u tail=%u\n",
1950 gtid, taskdata, thread_data->td.td_deque_ntasks,
1951 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001952
Jonathan Peyton30419822017-05-12 18:01:32 +00001953 task = KMP_TASKDATA_TO_TASK(taskdata);
1954 return task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955}
1956
Jim Cownie5e8470a2013-09-27 10:38:44 +00001957// __kmp_steal_task: remove a task from another thread's deque
1958// Assume that calling thread has already checked existence of
1959// task_team thread_data before calling this routine.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001960static kmp_task_t *__kmp_steal_task(kmp_info_t *victim, kmp_int32 gtid,
1961 kmp_task_team_t *task_team,
1962 volatile kmp_int32 *unfinished_threads,
1963 int *thread_finished,
1964 kmp_int32 is_constrained) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001965 kmp_task_t *task;
1966 kmp_taskdata_t *taskdata;
1967 kmp_thread_data_t *victim_td, *threads_data;
1968 kmp_int32 victim_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969
Jonathan Peyton30419822017-05-12 18:01:32 +00001970 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971
Jonathan Peyton30419822017-05-12 18:01:32 +00001972 threads_data = task_team->tt.tt_threads_data;
1973 KMP_DEBUG_ASSERT(threads_data != NULL); // Caller should check this condition
Jim Cownie5e8470a2013-09-27 10:38:44 +00001974
Jonathan Peyton30419822017-05-12 18:01:32 +00001975 victim_tid = victim->th.th_info.ds.ds_tid;
1976 victim_td = &threads_data[victim_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001977
Jonathan Peyton30419822017-05-12 18:01:32 +00001978 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
1979 "task_team=%p ntasks=%d "
1980 "head=%u tail=%u\n",
1981 gtid, __kmp_gtid_from_thread(victim), task_team,
1982 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1983 victim_td->td.td_deque_tail));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001984
Jonathan Peyton30419822017-05-12 18:01:32 +00001985 if ((TCR_4(victim_td->td.td_deque_ntasks) ==
1986 0) || // Caller should not check this condition
1987 (TCR_PTR(victim->th.th_task_team) !=
1988 task_team)) // GEH: why would this happen?
1989 {
1990 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
1991 "task_team=%p "
Jim Cownie5e8470a2013-09-27 10:38:44 +00001992 "ntasks=%d head=%u tail=%u\n",
Jonathan Peyton30419822017-05-12 18:01:32 +00001993 gtid, __kmp_gtid_from_thread(victim), task_team,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
Jonathan Peyton30419822017-05-12 18:01:32 +00001995 victim_td->td.td_deque_tail));
1996 return NULL;
1997 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001998
Jonathan Peyton30419822017-05-12 18:01:32 +00001999 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
2000
2001 // Check again after we acquire the lock
2002 if ((TCR_4(victim_td->td.td_deque_ntasks) == 0) ||
2003 (TCR_PTR(victim->th.th_task_team) !=
2004 task_team)) // GEH: why would this happen?
2005 {
2006 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2007 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
2008 "task_team=%p "
2009 "ntasks=%d head=%u tail=%u\n",
2010 gtid, __kmp_gtid_from_thread(victim), task_team,
2011 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2012 victim_td->td.td_deque_tail));
2013 return NULL;
2014 }
2015
2016 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
2017
2018 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
2019 if (is_constrained) {
2020 // we need to check if the candidate obeys task scheduling constraint:
2021 // only descendant of current task can be scheduled
2022 kmp_taskdata_t *current = __kmp_threads[gtid]->th.th_current_task;
2023 kmp_int32 level = current->td_level;
2024 kmp_taskdata_t *parent = taskdata->td_parent;
2025 while (parent != current && parent->td_level > level) {
2026 parent = parent->td_parent; // check generation up to the level of the
2027 // current task
2028 KMP_DEBUG_ASSERT(parent != NULL);
2029 }
2030 if (parent != current) {
2031 // If the head task is not a descendant of the current task then do not
2032 // steal it. No other task in victim's deque can be a descendant of the
2033 // current task.
2034 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2035 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from "
2036 "T#%d: task_team=%p "
2037 "ntasks=%d head=%u tail=%u\n",
2038 gtid,
2039 __kmp_gtid_from_thread(threads_data[victim_tid].td.td_thr),
2040 task_team, victim_td->td.td_deque_ntasks,
2041 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2042 return NULL;
2043 }
2044 }
2045 // Bump head pointer and Wrap.
2046 victim_td->td.td_deque_head =
2047 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
2048 if (*thread_finished) {
2049 // We need to un-mark this victim as a finished victim. This must be done
2050 // before releasing the lock, or else other threads (starting with the
2051 // master victim) might be prematurely released from the barrier!!!
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00002052 kmp_int32 count;
Jonathan Peyton30419822017-05-12 18:01:32 +00002053
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00002054 count = KMP_TEST_THEN_INC32(unfinished_threads);
Jonathan Peyton30419822017-05-12 18:01:32 +00002055
2056 KA_TRACE(
2057 20,
2058 ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
2059 gtid, count + 1, task_team));
2060
2061 *thread_finished = FALSE;
2062 }
2063 TCW_4(victim_td->td.td_deque_ntasks,
2064 TCR_4(victim_td->td.td_deque_ntasks) - 1);
2065
Jonathan Peyton30419822017-05-12 18:01:32 +00002066 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2067
2068 KMP_COUNT_BLOCK(TASK_stolen);
2069 KA_TRACE(
2070 10,
2071 ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
2072 "ntasks=%d head=%u tail=%u\n",
2073 gtid, taskdata, __kmp_gtid_from_thread(victim), task_team,
2074 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2075 victim_td->td.td_deque_tail));
2076
2077 task = KMP_TASKDATA_TO_TASK(taskdata);
2078 return task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002079}
2080
Jonathan Peyton30419822017-05-12 18:01:32 +00002081// __kmp_execute_tasks_template: Choose and execute tasks until either the
2082// condition is statisfied (return true) or there are none left (return false).
2083//
Jim Cownie5e8470a2013-09-27 10:38:44 +00002084// final_spin is TRUE if this is the spin at the release barrier.
2085// thread_finished indicates whether the thread is finished executing all
2086// the tasks it has on its deque, and is at the release barrier.
2087// spinner is the location on which to spin.
2088// spinner == NULL means only execute a single task and return.
2089// checker is the value to check to terminate the spin.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002090template <class C>
Jonathan Peyton30419822017-05-12 18:01:32 +00002091static inline int __kmp_execute_tasks_template(
2092 kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
2093 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
2094 kmp_int32 is_constrained) {
2095 kmp_task_team_t *task_team = thread->th.th_task_team;
2096 kmp_thread_data_t *threads_data;
2097 kmp_task_t *task;
2098 kmp_info_t *other_thread;
2099 kmp_taskdata_t *current_task = thread->th.th_current_task;
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00002100 volatile kmp_int32 *unfinished_threads;
Jonathan Peyton30419822017-05-12 18:01:32 +00002101 kmp_int32 nthreads, victim = -2, use_own_tasks = 1, new_victim = 0,
2102 tid = thread->th.th_info.ds.ds_tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002103
Jonathan Peyton30419822017-05-12 18:01:32 +00002104 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2105 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002106
Jonathan Peyton30419822017-05-12 18:01:32 +00002107 if (task_team == NULL)
2108 return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002109
Jonathan Peyton30419822017-05-12 18:01:32 +00002110 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
2111 "*thread_finished=%d\n",
2112 gtid, final_spin, *thread_finished));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002113
Jonathan Peyton30419822017-05-12 18:01:32 +00002114 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
2115 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2116 KMP_DEBUG_ASSERT(threads_data != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002117
Jonathan Peyton30419822017-05-12 18:01:32 +00002118 nthreads = task_team->tt.tt_nproc;
2119 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002120#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002121 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002122#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002123 KMP_DEBUG_ASSERT(nthreads > 1);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002124#endif
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00002125 KMP_DEBUG_ASSERT(TCR_4(*unfinished_threads) >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002126
Jonathan Peyton30419822017-05-12 18:01:32 +00002127 while (1) { // Outer loop keeps trying to find tasks in case of single thread
2128 // getting tasks from target constructs
2129 while (1) { // Inner loop to find a task and execute it
2130 task = NULL;
2131 if (use_own_tasks) { // check on own queue first
2132 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
2133 }
2134 if ((task == NULL) && (nthreads > 1)) { // Steal a task
2135 int asleep = 1;
2136 use_own_tasks = 0;
2137 // Try to steal from the last place I stole from successfully.
2138 if (victim == -2) { // haven't stolen anything yet
2139 victim = threads_data[tid].td.td_deque_last_stolen;
2140 if (victim !=
2141 -1) // if we have a last stolen from victim, get the thread
2142 other_thread = threads_data[victim].td.td_thr;
2143 }
2144 if (victim != -1) { // found last victim
2145 asleep = 0;
2146 } else if (!new_victim) { // no recent steals and we haven't already
2147 // used a new victim; select a random thread
2148 do { // Find a different thread to steal work from.
2149 // Pick a random thread. Initial plan was to cycle through all the
2150 // threads, and only return if we tried to steal from every thread,
2151 // and failed. Arch says that's not such a great idea.
2152 victim = __kmp_get_random(thread) % (nthreads - 1);
2153 if (victim >= tid) {
2154 ++victim; // Adjusts random distribution to exclude self
Jim Cownie5e8470a2013-09-27 10:38:44 +00002155 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002156 // Found a potential victim
2157 other_thread = threads_data[victim].td.td_thr;
2158 // There is a slight chance that __kmp_enable_tasking() did not wake
2159 // up all threads waiting at the barrier. If victim is sleeping,
2160 // then wake it up. Since we were going to pay the cache miss
2161 // penalty for referencing another thread's kmp_info_t struct
2162 // anyway,
2163 // the check shouldn't cost too much performance at this point. In
2164 // extra barrier mode, tasks do not sleep at the separate tasking
2165 // barrier, so this isn't a problem.
2166 asleep = 0;
2167 if ((__kmp_tasking_mode == tskm_task_teams) &&
2168 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00002169 (TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) !=
2170 NULL)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002171 asleep = 1;
2172 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
2173 other_thread->th.th_sleep_loc);
2174 // A sleeping thread should not have any tasks on it's queue.
2175 // There is a slight possibility that it resumes, steals a task
2176 // from another thread, which spawns more tasks, all in the time
2177 // that it takes this thread to check => don't write an assertion
2178 // that the victim's queue is empty. Try stealing from a
2179 // different thread.
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002180 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002181 } while (asleep);
2182 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002183
Jonathan Peyton30419822017-05-12 18:01:32 +00002184 if (!asleep) {
2185 // We have a victim to try to steal from
2186 task = __kmp_steal_task(other_thread, gtid, task_team,
2187 unfinished_threads, thread_finished,
2188 is_constrained);
2189 }
2190 if (task != NULL) { // set last stolen to victim
2191 if (threads_data[tid].td.td_deque_last_stolen != victim) {
2192 threads_data[tid].td.td_deque_last_stolen = victim;
2193 // The pre-refactored code did not try more than 1 successful new
2194 // vicitm, unless the last one generated more local tasks;
2195 // new_victim keeps track of this
2196 new_victim = 1;
2197 }
2198 } else { // No tasks found; unset last_stolen
2199 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
2200 victim = -2; // no successful victim found
2201 }
2202 }
Jonathan Peytone8104ad2015-06-08 18:56:33 +00002203
Jonathan Peyton30419822017-05-12 18:01:32 +00002204 if (task == NULL) // break out of tasking loop
2205 break;
2206
2207// Found a task; execute it
Jim Cownie5e8470a2013-09-27 10:38:44 +00002208#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002209 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2210 if (itt_sync_obj == NULL) { // we are at fork barrier where we could not
2211 // get the object reliably
2212 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2213 }
2214 __kmp_itt_task_starting(itt_sync_obj);
2215 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002216#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
Jonathan Peyton30419822017-05-12 18:01:32 +00002217 __kmp_invoke_task(gtid, task, current_task);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002218#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002219 if (itt_sync_obj != NULL)
2220 __kmp_itt_task_finished(itt_sync_obj);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002221#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002222 // If this thread is only partway through the barrier and the condition is
2223 // met, then return now, so that the barrier gather/release pattern can
2224 // proceed. If this thread is in the last spin loop in the barrier,
2225 // waiting to be released, we know that the termination condition will not
2226 // be satisified, so don't waste any cycles checking it.
2227 if (flag == NULL || (!final_spin && flag->done_check())) {
2228 KA_TRACE(
2229 15,
2230 ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
2231 gtid));
2232 return TRUE;
2233 }
2234 if (thread->th.th_task_team == NULL) {
2235 break;
2236 }
2237 // Yield before executing next task
2238 KMP_YIELD(__kmp_library == library_throughput);
2239 // If execution of a stolen task results in more tasks being placed on our
2240 // run queue, reset use_own_tasks
2241 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
2242 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned "
2243 "other tasks, restart\n",
2244 gtid));
2245 use_own_tasks = 1;
2246 new_victim = 0;
2247 }
Jonathan Peytonc4c722a2016-06-09 18:27:03 +00002248 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002249
2250// The task source has been exhausted. If in final spin loop of barrier, check
2251// if termination condition is satisfied.
2252#if OMP_45_ENABLED
2253 // The work queue may be empty but there might be proxy tasks still
2254 // executing
2255 if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
2256#else
2257 if (final_spin)
2258#endif
2259 {
2260 // First, decrement the #unfinished threads, if that has not already been
2261 // done. This decrement might be to the spin location, and result in the
2262 // termination condition being satisfied.
2263 if (!*thread_finished) {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00002264 kmp_int32 count;
Jonathan Peyton30419822017-05-12 18:01:32 +00002265
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00002266 count = KMP_TEST_THEN_DEC32(unfinished_threads) - 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00002267 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec "
2268 "unfinished_threads to %d task_team=%p\n",
2269 gtid, count, task_team));
2270 *thread_finished = TRUE;
2271 }
2272
2273 // It is now unsafe to reference thread->th.th_team !!!
2274 // Decrementing task_team->tt.tt_unfinished_threads can allow the master
2275 // thread to pass through the barrier, where it might reset each thread's
2276 // th.th_team field for the next parallel region. If we can steal more
2277 // work, we know that this has not happened yet.
2278 if (flag != NULL && flag->done_check()) {
2279 KA_TRACE(
2280 15,
2281 ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
2282 gtid));
2283 return TRUE;
2284 }
2285 }
2286
2287 // If this thread's task team is NULL, master has recognized that there are
2288 // no more tasks; bail out
2289 if (thread->th.th_task_team == NULL) {
2290 KA_TRACE(15,
2291 ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
2292 return FALSE;
2293 }
2294
2295#if OMP_45_ENABLED
2296 // We could be getting tasks from target constructs; if this is the only
2297 // thread, keep trying to execute tasks from own queue
2298 if (nthreads == 1)
2299 use_own_tasks = 1;
2300 else
2301#endif
2302 {
2303 KA_TRACE(15,
2304 ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
2305 return FALSE;
2306 }
2307 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002308}
2309
Jonathan Peyton30419822017-05-12 18:01:32 +00002310int __kmp_execute_tasks_32(
2311 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
2312 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
2313 kmp_int32 is_constrained) {
2314 return __kmp_execute_tasks_template(
2315 thread, gtid, flag, final_spin,
2316 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002317}
2318
Jonathan Peyton30419822017-05-12 18:01:32 +00002319int __kmp_execute_tasks_64(
2320 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
2321 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
2322 kmp_int32 is_constrained) {
2323 return __kmp_execute_tasks_template(
2324 thread, gtid, flag, final_spin,
2325 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002326}
2327
Jonathan Peyton30419822017-05-12 18:01:32 +00002328int __kmp_execute_tasks_oncore(
2329 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
2330 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
2331 kmp_int32 is_constrained) {
2332 return __kmp_execute_tasks_template(
2333 thread, gtid, flag, final_spin,
2334 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002335}
2336
Jim Cownie5e8470a2013-09-27 10:38:44 +00002337// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2338// next barrier so they can assist in executing enqueued tasks.
2339// First thread in allocates the task team atomically.
Jonathan Peyton30419822017-05-12 18:01:32 +00002340static void __kmp_enable_tasking(kmp_task_team_t *task_team,
2341 kmp_info_t *this_thr) {
2342 kmp_thread_data_t *threads_data;
2343 int nthreads, i, is_init_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002344
Jonathan Peyton30419822017-05-12 18:01:32 +00002345 KA_TRACE(10, ("__kmp_enable_tasking(enter): T#%d\n",
2346 __kmp_gtid_from_thread(this_thr)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002347
Jonathan Peyton30419822017-05-12 18:01:32 +00002348 KMP_DEBUG_ASSERT(task_team != NULL);
2349 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002350
Jonathan Peyton30419822017-05-12 18:01:32 +00002351 nthreads = task_team->tt.tt_nproc;
2352 KMP_DEBUG_ASSERT(nthreads > 0);
2353 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354
Jonathan Peyton30419822017-05-12 18:01:32 +00002355 // Allocate or increase the size of threads_data if necessary
2356 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002357
Jonathan Peyton30419822017-05-12 18:01:32 +00002358 if (!is_init_thread) {
2359 // Some other thread already set up the array.
2360 KA_TRACE(
2361 20,
2362 ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2363 __kmp_gtid_from_thread(this_thr)));
2364 return;
2365 }
2366 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2367 KMP_DEBUG_ASSERT(threads_data != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002368
Jonathan Peyton30419822017-05-12 18:01:32 +00002369 if ((__kmp_tasking_mode == tskm_task_teams) &&
2370 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
2371 // Release any threads sleeping at the barrier, so that they can steal
2372 // tasks and execute them. In extra barrier mode, tasks do not sleep
2373 // at the separate tasking barrier, so this isn't a problem.
2374 for (i = 0; i < nthreads; i++) {
2375 volatile void *sleep_loc;
2376 kmp_info_t *thread = threads_data[i].td.td_thr;
2377
2378 if (i == this_thr->th.th_info.ds.ds_tid) {
2379 continue;
2380 }
2381 // Since we haven't locked the thread's suspend mutex lock at this
2382 // point, there is a small window where a thread might be putting
2383 // itself to sleep, but hasn't set the th_sleep_loc field yet.
2384 // To work around this, __kmp_execute_tasks_template() periodically checks
2385 // see if other threads are sleeping (using the same random mechanism that
2386 // is used for task stealing) and awakens them if they are.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00002387 if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
2388 NULL) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002389 KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2390 __kmp_gtid_from_thread(this_thr),
2391 __kmp_gtid_from_thread(thread)));
2392 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2393 } else {
2394 KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2395 __kmp_gtid_from_thread(this_thr),
2396 __kmp_gtid_from_thread(thread)));
2397 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002398 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002399 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002400
Jonathan Peyton30419822017-05-12 18:01:32 +00002401 KA_TRACE(10, ("__kmp_enable_tasking(exit): T#%d\n",
2402 __kmp_gtid_from_thread(this_thr)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002403}
2404
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002405/* // TODO: Check the comment consistency
Jim Cownie5e8470a2013-09-27 10:38:44 +00002406 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2407 * like a shadow of the kmp_team_t data struct, with a different lifetime.
2408 * After a child * thread checks into a barrier and calls __kmp_release() from
2409 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2410 * longer assume that the kmp_team_t structure is intact (at any moment, the
2411 * master thread may exit the barrier code and free the team data structure,
2412 * and return the threads to the thread pool).
2413 *
2414 * This does not work with the the tasking code, as the thread is still
2415 * expected to participate in the execution of any tasks that may have been
2416 * spawned my a member of the team, and the thread still needs access to all
2417 * to each thread in the team, so that it can steal work from it.
2418 *
2419 * Enter the existence of the kmp_task_team_t struct. It employs a reference
2420 * counting mechanims, and is allocated by the master thread before calling
2421 * __kmp_<barrier_kind>_release, and then is release by the last thread to
2422 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2423 * of the kmp_task_team_t structs for consecutive barriers can overlap
2424 * (and will, unless the master thread is the last thread to exit the barrier
2425 * release phase, which is not typical).
2426 *
2427 * The existence of such a struct is useful outside the context of tasking,
2428 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2429 * so that any performance differences show up when comparing the 2.5 vs. 3.0
2430 * libraries.
2431 *
2432 * We currently use the existence of the threads array as an indicator that
2433 * tasks were spawned since the last barrier. If the structure is to be
2434 * useful outside the context of tasking, then this will have to change, but
2435 * not settting the field minimizes the performance impact of tasking on
2436 * barriers, when no explicit tasks were spawned (pushed, actually).
2437 */
2438
Jonathan Peyton30419822017-05-12 18:01:32 +00002439static kmp_task_team_t *__kmp_free_task_teams =
2440 NULL; // Free list for task_team data structures
Jim Cownie5e8470a2013-09-27 10:38:44 +00002441// Lock for task team data structures
Jonathan Peyton30419822017-05-12 18:01:32 +00002442static kmp_bootstrap_lock_t __kmp_task_team_lock =
2443 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002444
Jim Cownie5e8470a2013-09-27 10:38:44 +00002445// __kmp_alloc_task_deque:
2446// Allocates a task deque for a particular thread, and initialize the necessary
2447// data structures relating to the deque. This only happens once per thread
Jonathan Peyton30419822017-05-12 18:01:32 +00002448// per task team since task teams are recycled. No lock is needed during
2449// allocation since each thread allocates its own deque.
2450static void __kmp_alloc_task_deque(kmp_info_t *thread,
2451 kmp_thread_data_t *thread_data) {
2452 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
2453 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002454
Jonathan Peyton30419822017-05-12 18:01:32 +00002455 // Initialize last stolen task field to "none"
2456 thread_data->td.td_deque_last_stolen = -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002457
Jonathan Peyton30419822017-05-12 18:01:32 +00002458 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
2459 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
2460 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002461
Jonathan Peyton30419822017-05-12 18:01:32 +00002462 KE_TRACE(
2463 10,
2464 ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2465 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
2466 // Allocate space for task deque, and zero the deque
2467 // Cannot use __kmp_thread_calloc() because threads not around for
2468 // kmp_reap_task_team( ).
2469 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
2470 INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2471 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002472}
2473
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002474// __kmp_realloc_task_deque:
Jonathan Peyton30419822017-05-12 18:01:32 +00002475// Re-allocates a task deque for a particular thread, copies the content from
2476// the old deque and adjusts the necessary data structures relating to the
2477// deque. This operation must be done with a the deque_lock being held
2478static void __kmp_realloc_task_deque(kmp_info_t *thread,
2479 kmp_thread_data_t *thread_data) {
2480 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
2481 kmp_int32 new_size = 2 * size;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002482
Jonathan Peyton30419822017-05-12 18:01:32 +00002483 KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
2484 "%d] for thread_data %p\n",
2485 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002486
Jonathan Peyton30419822017-05-12 18:01:32 +00002487 kmp_taskdata_t **new_deque =
2488 (kmp_taskdata_t **)__kmp_allocate(new_size * sizeof(kmp_taskdata_t *));
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002489
Jonathan Peyton30419822017-05-12 18:01:32 +00002490 int i, j;
2491 for (i = thread_data->td.td_deque_head, j = 0; j < size;
2492 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
2493 new_deque[j] = thread_data->td.td_deque[i];
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002494
Jonathan Peyton30419822017-05-12 18:01:32 +00002495 __kmp_free(thread_data->td.td_deque);
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002496
Jonathan Peyton30419822017-05-12 18:01:32 +00002497 thread_data->td.td_deque_head = 0;
2498 thread_data->td.td_deque_tail = size;
2499 thread_data->td.td_deque = new_deque;
2500 thread_data->td.td_deque_size = new_size;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00002501}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002502
Jim Cownie5e8470a2013-09-27 10:38:44 +00002503// __kmp_free_task_deque:
Jonathan Peyton30419822017-05-12 18:01:32 +00002504// Deallocates a task deque for a particular thread. Happens at library
2505// deallocation so don't need to reset all thread data fields.
2506static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
2507 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002508
Jonathan Peyton30419822017-05-12 18:01:32 +00002509 if (thread_data->td.td_deque != NULL) {
2510 TCW_4(thread_data->td.td_deque_ntasks, 0);
2511 __kmp_free(thread_data->td.td_deque);
2512 thread_data->td.td_deque = NULL;
2513 }
2514 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002515
2516#ifdef BUILD_TIED_TASK_STACK
Jonathan Peyton30419822017-05-12 18:01:32 +00002517 // GEH: Figure out what to do here for td_susp_tied_tasks
2518 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
2519 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
2520 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002521#endif // BUILD_TIED_TASK_STACK
2522}
2523
Jim Cownie5e8470a2013-09-27 10:38:44 +00002524// __kmp_realloc_task_threads_data:
Jonathan Peyton30419822017-05-12 18:01:32 +00002525// Allocates a threads_data array for a task team, either by allocating an
2526// initial array or enlarging an existing array. Only the first thread to get
2527// the lock allocs or enlarges the array and re-initializes the array eleemnts.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002528// That thread returns "TRUE", the rest return "FALSE".
2529// Assumes that the new array size is given by task_team -> tt.tt_nproc.
2530// The current size is given by task_team -> tt.tt_max_threads.
Jonathan Peyton30419822017-05-12 18:01:32 +00002531static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
2532 kmp_task_team_t *task_team) {
2533 kmp_thread_data_t **threads_data_p;
2534 kmp_int32 nthreads, maxthreads;
2535 int is_init_thread = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002536
Jonathan Peyton30419822017-05-12 18:01:32 +00002537 if (TCR_4(task_team->tt.tt_found_tasks)) {
2538 // Already reallocated and initialized.
2539 return FALSE;
2540 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002541
Jonathan Peyton30419822017-05-12 18:01:32 +00002542 threads_data_p = &task_team->tt.tt_threads_data;
2543 nthreads = task_team->tt.tt_nproc;
2544 maxthreads = task_team->tt.tt_max_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002545
Jonathan Peyton30419822017-05-12 18:01:32 +00002546 // All threads must lock when they encounter the first task of the implicit
2547 // task region to make sure threads_data fields are (re)initialized before
2548 // used.
2549 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002550
Jonathan Peyton30419822017-05-12 18:01:32 +00002551 if (!TCR_4(task_team->tt.tt_found_tasks)) {
2552 // first thread to enable tasking
2553 kmp_team_t *team = thread->th.th_team;
2554 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002555
Jonathan Peyton30419822017-05-12 18:01:32 +00002556 is_init_thread = TRUE;
2557 if (maxthreads < nthreads) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002558
Jonathan Peyton30419822017-05-12 18:01:32 +00002559 if (*threads_data_p != NULL) {
2560 kmp_thread_data_t *old_data = *threads_data_p;
2561 kmp_thread_data_t *new_data = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002562
Jonathan Peyton30419822017-05-12 18:01:32 +00002563 KE_TRACE(
2564 10,
2565 ("__kmp_realloc_task_threads_data: T#%d reallocating "
2566 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2567 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
2568 // Reallocate threads_data to have more elements than current array
2569 // Cannot use __kmp_thread_realloc() because threads not around for
2570 // kmp_reap_task_team( ). Note all new array entries are initialized
2571 // to zero by __kmp_allocate().
2572 new_data = (kmp_thread_data_t *)__kmp_allocate(
2573 nthreads * sizeof(kmp_thread_data_t));
2574 // copy old data to new data
2575 KMP_MEMCPY_S((void *)new_data, nthreads * sizeof(kmp_thread_data_t),
Andrey Churbanov71483f22017-07-18 11:56:16 +00002576 (void *)old_data, maxthreads * sizeof(kmp_thread_data_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002577
2578#ifdef BUILD_TIED_TASK_STACK
Jonathan Peyton30419822017-05-12 18:01:32 +00002579 // GEH: Figure out if this is the right thing to do
2580 for (i = maxthreads; i < nthreads; i++) {
2581 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2582 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
2583 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002584#endif // BUILD_TIED_TASK_STACK
Jonathan Peyton30419822017-05-12 18:01:32 +00002585 // Install the new data and free the old data
2586 (*threads_data_p) = new_data;
2587 __kmp_free(old_data);
2588 } else {
2589 KE_TRACE(10, ("__kmp_realloc_task_threads_data: T#%d allocating "
2590 "threads data for task_team %p, size = %d\n",
2591 __kmp_gtid_from_thread(thread), task_team, nthreads));
2592 // Make the initial allocate for threads_data array, and zero entries
2593 // Cannot use __kmp_thread_calloc() because threads not around for
2594 // kmp_reap_task_team( ).
2595 ANNOTATE_IGNORE_WRITES_BEGIN();
2596 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
2597 nthreads * sizeof(kmp_thread_data_t));
2598 ANNOTATE_IGNORE_WRITES_END();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002599#ifdef BUILD_TIED_TASK_STACK
Jonathan Peyton30419822017-05-12 18:01:32 +00002600 // GEH: Figure out if this is the right thing to do
Jim Cownie5e8470a2013-09-27 10:38:44 +00002601 for (i = 0; i < nthreads; i++) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002602 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2603 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002604 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002605#endif // BUILD_TIED_TASK_STACK
2606 }
2607 task_team->tt.tt_max_threads = nthreads;
2608 } else {
2609 // If array has (more than) enough elements, go ahead and use it
2610 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002611 }
2612
Jonathan Peyton30419822017-05-12 18:01:32 +00002613 // initialize threads_data pointers back to thread_info structures
2614 for (i = 0; i < nthreads; i++) {
2615 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2616 thread_data->td.td_thr = team->t.t_threads[i];
2617
2618 if (thread_data->td.td_deque_last_stolen >= nthreads) {
2619 // The last stolen field survives across teams / barrier, and the number
2620 // of threads may have changed. It's possible (likely?) that a new
2621 // parallel region will exhibit the same behavior as previous region.
2622 thread_data->td.td_deque_last_stolen = -1;
2623 }
2624 }
2625
2626 KMP_MB();
2627 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
2628 }
2629
2630 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
2631 return is_init_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002632}
2633
Jim Cownie5e8470a2013-09-27 10:38:44 +00002634// __kmp_free_task_threads_data:
2635// Deallocates a threads_data array for a task team, including any attached
2636// tasking deques. Only occurs at library shutdown.
Jonathan Peyton30419822017-05-12 18:01:32 +00002637static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
2638 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
2639 if (task_team->tt.tt_threads_data != NULL) {
2640 int i;
2641 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
2642 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002643 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002644 __kmp_free(task_team->tt.tt_threads_data);
2645 task_team->tt.tt_threads_data = NULL;
2646 }
2647 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002648}
2649
Jim Cownie5e8470a2013-09-27 10:38:44 +00002650// __kmp_allocate_task_team:
2651// Allocates a task team associated with a specific team, taking it from
Jonathan Peyton30419822017-05-12 18:01:32 +00002652// the global task team free list if possible. Also initializes data
2653// structures.
2654static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
2655 kmp_team_t *team) {
2656 kmp_task_team_t *task_team = NULL;
2657 int nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002658
Jonathan Peyton30419822017-05-12 18:01:32 +00002659 KA_TRACE(20, ("__kmp_allocate_task_team: T#%d entering; team = %p\n",
2660 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002661
Jonathan Peyton30419822017-05-12 18:01:32 +00002662 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2663 // Take a task team from the task team pool
2664 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
2665 if (__kmp_free_task_teams != NULL) {
2666 task_team = __kmp_free_task_teams;
2667 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
2668 task_team->tt.tt_next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002669 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002670 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
2671 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002672
Jonathan Peyton30419822017-05-12 18:01:32 +00002673 if (task_team == NULL) {
2674 KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating "
2675 "task team for team %p\n",
2676 __kmp_gtid_from_thread(thread), team));
2677 // Allocate a new task team if one is not available.
2678 // Cannot use __kmp_thread_malloc() because threads not around for
2679 // kmp_reap_task_team( ).
2680 task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t));
2681 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
2682 // AC: __kmp_allocate zeroes returned memory
2683 // task_team -> tt.tt_threads_data = NULL;
2684 // task_team -> tt.tt_max_threads = 0;
2685 // task_team -> tt.tt_next = NULL;
2686 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002687
Jonathan Peyton30419822017-05-12 18:01:32 +00002688 TCW_4(task_team->tt.tt_found_tasks, FALSE);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002689#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002690 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002691#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002692 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002693
Jonathan Peyton30419822017-05-12 18:01:32 +00002694 TCW_4(task_team->tt.tt_unfinished_threads, nthreads);
2695 TCW_4(task_team->tt.tt_active, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002696
Jonathan Peyton30419822017-05-12 18:01:32 +00002697 KA_TRACE(20, ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
2698 "unfinished_threads init'd to %d\n",
2699 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
2700 task_team->tt.tt_unfinished_threads));
2701 return task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002702}
2703
Jim Cownie5e8470a2013-09-27 10:38:44 +00002704// __kmp_free_task_team:
2705// Frees the task team associated with a specific thread, and adds it
2706// to the global task team free list.
Jonathan Peyton30419822017-05-12 18:01:32 +00002707void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
2708 KA_TRACE(20, ("__kmp_free_task_team: T#%d task_team = %p\n",
2709 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002710
Jonathan Peyton30419822017-05-12 18:01:32 +00002711 // Put task team back on free list
2712 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002713
Jonathan Peyton30419822017-05-12 18:01:32 +00002714 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
2715 task_team->tt.tt_next = __kmp_free_task_teams;
2716 TCW_PTR(__kmp_free_task_teams, task_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002717
Jonathan Peyton30419822017-05-12 18:01:32 +00002718 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002719}
2720
Jim Cownie5e8470a2013-09-27 10:38:44 +00002721// __kmp_reap_task_teams:
2722// Free all the task teams on the task team free list.
2723// Should only be done during library shutdown.
Jonathan Peyton30419822017-05-12 18:01:32 +00002724// Cannot do anything that needs a thread structure or gtid since they are
2725// already gone.
2726void __kmp_reap_task_teams(void) {
2727 kmp_task_team_t *task_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002728
Jonathan Peyton30419822017-05-12 18:01:32 +00002729 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2730 // Free all task_teams on the free list
2731 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
2732 while ((task_team = __kmp_free_task_teams) != NULL) {
2733 __kmp_free_task_teams = task_team->tt.tt_next;
2734 task_team->tt.tt_next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002735
Jonathan Peyton30419822017-05-12 18:01:32 +00002736 // Free threads_data if necessary
2737 if (task_team->tt.tt_threads_data != NULL) {
2738 __kmp_free_task_threads_data(task_team);
2739 }
2740 __kmp_free(task_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002741 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002742 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
2743 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002744}
2745
Jim Cownie5e8470a2013-09-27 10:38:44 +00002746// __kmp_wait_to_unref_task_teams:
2747// Some threads could still be in the fork barrier release code, possibly
2748// trying to steal tasks. Wait for each thread to unreference its task team.
Jonathan Peyton30419822017-05-12 18:01:32 +00002749void __kmp_wait_to_unref_task_teams(void) {
2750 kmp_info_t *thread;
2751 kmp_uint32 spins;
2752 int done;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002753
Jonathan Peyton30419822017-05-12 18:01:32 +00002754 KMP_INIT_YIELD(spins);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002755
Jonathan Peyton30419822017-05-12 18:01:32 +00002756 for (;;) {
2757 done = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002758
Jonathan Peyton30419822017-05-12 18:01:32 +00002759 // TODO: GEH - this may be is wrong because some sync would be necessary
2760 // in case threads are added to the pool during the traversal. Need to
2761 // verify that lock for thread pool is held when calling this routine.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00002762 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00002763 thread = thread->th.th_next_pool) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002764#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00002765 DWORD exit_val;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002766#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002767 if (TCR_PTR(thread->th.th_task_team) == NULL) {
2768 KA_TRACE(10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2769 __kmp_gtid_from_thread(thread)));
2770 continue;
2771 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002772#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00002773 // TODO: GEH - add this check for Linux* OS / OS X* as well?
2774 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2775 thread->th.th_task_team = NULL;
2776 continue;
2777 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002778#endif
2779
Jonathan Peyton30419822017-05-12 18:01:32 +00002780 done = FALSE; // Because th_task_team pointer is not NULL for this thread
Jim Cownie5e8470a2013-09-27 10:38:44 +00002781
Jonathan Peyton30419822017-05-12 18:01:32 +00002782 KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
2783 "unreference task_team\n",
2784 __kmp_gtid_from_thread(thread)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002785
Jonathan Peyton30419822017-05-12 18:01:32 +00002786 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
2787 volatile void *sleep_loc;
2788 // If the thread is sleeping, awaken it.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00002789 if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
2790 NULL) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002791 KA_TRACE(
2792 10,
2793 ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2794 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
2795 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002796 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002797 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002798 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002799 if (done) {
2800 break;
2801 }
2802
2803 // If we are oversubscribed, or have waited a bit (and library mode is
2804 // throughput), yield. Pause is in the following code.
2805 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
2806 KMP_YIELD_SPIN(spins); // Yields only if KMP_LIBRARY=throughput
2807 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002808}
2809
Jim Cownie5e8470a2013-09-27 10:38:44 +00002810// __kmp_task_team_setup: Create a task_team for the current team, but use
2811// an already created, unused one if it already exists.
Jonathan Peyton30419822017-05-12 18:01:32 +00002812void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, int always) {
2813 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002814
Jonathan Peyton30419822017-05-12 18:01:32 +00002815 // If this task_team hasn't been created yet, allocate it. It will be used in
2816 // the region after the next.
2817 // If it exists, it is the current task team and shouldn't be touched yet as
2818 // it may still be in use.
2819 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
2820 (always || team->t.t_nproc > 1)) {
2821 team->t.t_task_team[this_thr->th.th_task_state] =
2822 __kmp_allocate_task_team(this_thr, team);
2823 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p "
2824 "for team %d at parity=%d\n",
2825 __kmp_gtid_from_thread(this_thr),
2826 team->t.t_task_team[this_thr->th.th_task_state],
2827 ((team != NULL) ? team->t.t_id : -1),
2828 this_thr->th.th_task_state));
2829 }
Jonathan Peyton54127982015-11-04 21:37:48 +00002830
Jonathan Peyton30419822017-05-12 18:01:32 +00002831 // After threads exit the release, they will call sync, and then point to this
2832 // other task_team; make sure it is allocated and properly initialized. As
2833 // threads spin in the barrier release phase, they will continue to use the
2834 // previous task_team struct(above), until they receive the signal to stop
2835 // checking for tasks (they can't safely reference the kmp_team_t struct,
2836 // which could be reallocated by the master thread). No task teams are formed
2837 // for serialized teams.
2838 if (team->t.t_nproc > 1) {
2839 int other_team = 1 - this_thr->th.th_task_state;
2840 if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
2841 team->t.t_task_team[other_team] =
2842 __kmp_allocate_task_team(this_thr, team);
2843 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new "
2844 "task_team %p for team %d at parity=%d\n",
2845 __kmp_gtid_from_thread(this_thr),
2846 team->t.t_task_team[other_team],
2847 ((team != NULL) ? team->t.t_id : -1), other_team));
2848 } else { // Leave the old task team struct in place for the upcoming region;
2849 // adjust as needed
2850 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2851 if (!task_team->tt.tt_active ||
2852 team->t.t_nproc != task_team->tt.tt_nproc) {
2853 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2854 TCW_4(task_team->tt.tt_found_tasks, FALSE);
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002855#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002856 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
Jonathan Peytone1dad192015-11-30 20:05:13 +00002857#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002858 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc);
2859 TCW_4(task_team->tt.tt_active, TRUE);
2860 }
2861 // if team size has changed, the first thread to enable tasking will
2862 // realloc threads_data if necessary
2863 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team "
2864 "%p for team %d at parity=%d\n",
2865 __kmp_gtid_from_thread(this_thr),
2866 team->t.t_task_team[other_team],
2867 ((team != NULL) ? team->t.t_id : -1), other_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002868 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002869 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002870}
2871
Jim Cownie5e8470a2013-09-27 10:38:44 +00002872// __kmp_task_team_sync: Propagation of task team data from team to threads
2873// which happens just after the release phase of a team barrier. This may be
2874// called by any thread, but only for teams with # threads > 1.
Jonathan Peyton30419822017-05-12 18:01:32 +00002875void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
2876 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002877
Jonathan Peyton30419822017-05-12 18:01:32 +00002878 // Toggle the th_task_state field, to switch which task_team this thread
2879 // refers to
2880 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2881 // It is now safe to propagate the task team pointer from the team struct to
2882 // the current thread.
2883 TCW_PTR(this_thr->th.th_task_team,
2884 team->t.t_task_team[this_thr->th.th_task_state]);
2885 KA_TRACE(20,
2886 ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
2887 "%p from Team #%d (parity=%d)\n",
2888 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
2889 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002890}
2891
Jonathan Peyton30419822017-05-12 18:01:32 +00002892// __kmp_task_team_wait: Master thread waits for outstanding tasks after the
2893// barrier gather phase. Only called by master thread if #threads in team > 1 or
2894// if proxy tasks were created.
2895//
2896// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off
2897// by passing in 0 optionally as the last argument. When wait is zero, master
2898// thread does not wait for unfinished_threads to reach 0.
2899void __kmp_task_team_wait(
2900 kmp_info_t *this_thr,
2901 kmp_team_t *team USE_ITT_BUILD_ARG(void *itt_sync_obj), int wait) {
2902 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002903
Jonathan Peyton30419822017-05-12 18:01:32 +00002904 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2905 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002906
Jonathan Peyton30419822017-05-12 18:01:32 +00002907 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
2908 if (wait) {
2909 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks "
2910 "(for unfinished_threads to reach 0) on task_team = %p\n",
2911 __kmp_gtid_from_thread(this_thr), task_team));
2912 // Worker threads may have dropped through to release phase, but could
2913 // still be executing tasks. Wait here for tasks to complete. To avoid
2914 // memory contention, only master thread checks termination condition.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00002915 kmp_flag_32 flag(
2916 RCAST(volatile kmp_uint32 *, &task_team->tt.tt_unfinished_threads),
2917 0U);
Jonathan Peyton30419822017-05-12 18:01:32 +00002918 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002919 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002920 // Deactivate the old task team, so that the worker threads will stop
2921 // referencing it while spinning.
2922 KA_TRACE(
2923 20,
2924 ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
2925 "setting active to false, setting local and team's pointer to NULL\n",
2926 __kmp_gtid_from_thread(this_thr), task_team));
2927#if OMP_45_ENABLED
2928 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
2929 task_team->tt.tt_found_proxy_tasks == TRUE);
2930 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2931#else
2932 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1);
2933#endif
2934 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
2935 KMP_MB();
2936
2937 TCW_PTR(this_thr->th.th_task_team, NULL);
2938 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002939}
2940
Jim Cownie5e8470a2013-09-27 10:38:44 +00002941// __kmp_tasking_barrier:
Jonathan Peyton1bd61b42015-10-08 19:44:16 +00002942// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
Jonathan Peyton30419822017-05-12 18:01:32 +00002943// Internal function to execute all tasks prior to a regular barrier or a join
2944// barrier. It is a full barrier itself, which unfortunately turns regular
2945// barriers into double barriers and join barriers into 1 1/2 barriers.
2946void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00002947 volatile kmp_uint32 *spin = RCAST(
2948 volatile kmp_uint32 *,
2949 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
Jonathan Peyton30419822017-05-12 18:01:32 +00002950 int flag = FALSE;
2951 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002952
2953#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002954 KMP_FSYNC_SPIN_INIT(spin, (kmp_uint32 *)NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002955#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00002956 kmp_flag_32 spin_flag(spin, 0U);
2957 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
2958 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002959#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002960 // TODO: What about itt_sync_obj??
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00002961 KMP_FSYNC_SPIN_PREPARE(CCAST(kmp_uint32 *, spin));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002962#endif /* USE_ITT_BUILD */
2963
Jonathan Peyton30419822017-05-12 18:01:32 +00002964 if (TCR_4(__kmp_global.g.g_done)) {
2965 if (__kmp_global.g.g_abort)
2966 __kmp_abort_thread();
2967 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002968 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002969 KMP_YIELD(TRUE); // GH: We always yield here
2970 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002971#if USE_ITT_BUILD
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00002972 KMP_FSYNC_SPIN_ACQUIRED(CCAST(kmp_uint32 *, spin));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002973#endif /* USE_ITT_BUILD */
2974}
2975
Jonathan Peytondf6818b2016-06-14 17:57:47 +00002976#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002977
Jonathan Peyton30419822017-05-12 18:01:32 +00002978// __kmp_give_task puts a task into a given thread queue if:
2979// - the queue for that thread was created
2980// - there's space in that queue
2981// Because of this, __kmp_push_task needs to check if there's space after
2982// getting the lock
2983static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
2984 kmp_int32 pass) {
2985 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
2986 kmp_task_team_t *task_team = taskdata->td_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002987
Jonathan Peyton30419822017-05-12 18:01:32 +00002988 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n",
2989 taskdata, tid));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002990
Jonathan Peyton30419822017-05-12 18:01:32 +00002991 // If task_team is NULL something went really bad...
2992 KMP_DEBUG_ASSERT(task_team != NULL);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002993
Jonathan Peyton30419822017-05-12 18:01:32 +00002994 bool result = false;
2995 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00002996
Jonathan Peyton30419822017-05-12 18:01:32 +00002997 if (thread_data->td.td_deque == NULL) {
2998 // There's no queue in this thread, go find another one
2999 // We're guaranteed that at least one thread has a queue
3000 KA_TRACE(30,
3001 ("__kmp_give_task: thread %d has no queue while giving task %p.\n",
3002 tid, taskdata));
3003 return result;
3004 }
Jonathan Peyton134f90d2016-02-11 23:07:30 +00003005
Jonathan Peyton30419822017-05-12 18:01:32 +00003006 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3007 TASK_DEQUE_SIZE(thread_data->td)) {
3008 KA_TRACE(
3009 30,
3010 ("__kmp_give_task: queue is full while giving task %p to thread %d.\n",
3011 taskdata, tid));
3012
3013 // if this deque is bigger than the pass ratio give a chance to another
3014 // thread
3015 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3016 return result;
3017
3018 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3019 __kmp_realloc_task_deque(thread, thread_data);
3020
3021 } else {
3022
3023 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3024
3025 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3026 TASK_DEQUE_SIZE(thread_data->td)) {
3027 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to "
3028 "thread %d.\n",
3029 taskdata, tid));
3030
3031 // if this deque is bigger than the pass ratio give a chance to another
3032 // thread
3033 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3034 goto release_and_exit;
3035
3036 __kmp_realloc_task_deque(thread, thread_data);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003037 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003038 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003039
Jonathan Peyton30419822017-05-12 18:01:32 +00003040 // lock is held here, and there is space in the deque
Jonathan Peytonf4f96952016-05-31 19:07:00 +00003041
Jonathan Peyton30419822017-05-12 18:01:32 +00003042 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
3043 // Wrap index.
3044 thread_data->td.td_deque_tail =
3045 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
3046 TCW_4(thread_data->td.td_deque_ntasks,
3047 TCR_4(thread_data->td.td_deque_ntasks) + 1);
Jonathan Peytonf4f96952016-05-31 19:07:00 +00003048
Jonathan Peyton30419822017-05-12 18:01:32 +00003049 result = true;
3050 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n",
3051 taskdata, tid));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003052
3053release_and_exit:
Jonathan Peyton30419822017-05-12 18:01:32 +00003054 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003055
Jonathan Peyton30419822017-05-12 18:01:32 +00003056 return result;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003057}
3058
Jonathan Peyton30419822017-05-12 18:01:32 +00003059/* The finish of the proxy tasks is divided in two pieces:
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003060 - the top half is the one that can be done from a thread outside the team
3061 - the bottom half must be run from a them within the team
3062
Jonathan Peyton30419822017-05-12 18:01:32 +00003063 In order to run the bottom half the task gets queued back into one of the
3064 threads of the team. Once the td_incomplete_child_task counter of the parent
3065 is decremented the threads can leave the barriers. So, the bottom half needs
3066 to be queued before the counter is decremented. The top half is therefore
3067 divided in two parts:
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003068 - things that can be run before queuing the bottom half
3069 - things that must be run after queuing the bottom half
3070
Jonathan Peyton30419822017-05-12 18:01:32 +00003071 This creates a second race as the bottom half can free the task before the
3072 second top half is executed. To avoid this we use the
3073 td_incomplete_child_task of the proxy task to synchronize the top and bottom
3074 half. */
3075static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3076 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
3077 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3078 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
3079 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003080
Jonathan Peyton30419822017-05-12 18:01:32 +00003081 taskdata->td_flags.complete = 1; // mark the task as completed
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003082
Jonathan Peyton30419822017-05-12 18:01:32 +00003083 if (taskdata->td_taskgroup)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003084 KMP_TEST_THEN_DEC32(&taskdata->td_taskgroup->count);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003085
Jonathan Peyton30419822017-05-12 18:01:32 +00003086 // Create an imaginary children for this task so the bottom half cannot
3087 // release the task before we have completed the second top half
3088 TCI_4(taskdata->td_incomplete_child_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003089}
3090
Jonathan Peyton30419822017-05-12 18:01:32 +00003091static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3092 kmp_int32 children = 0;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003093
Jonathan Peyton30419822017-05-12 18:01:32 +00003094 // Predecrement simulated by "- 1" calculation
3095 children =
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00003096 KMP_TEST_THEN_DEC32(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
Jonathan Peyton30419822017-05-12 18:01:32 +00003097 KMP_DEBUG_ASSERT(children >= 0);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003098
Jonathan Peyton30419822017-05-12 18:01:32 +00003099 // Remove the imaginary children
3100 TCD_4(taskdata->td_incomplete_child_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003101}
3102
Jonathan Peyton30419822017-05-12 18:01:32 +00003103static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
3104 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3105 kmp_info_t *thread = __kmp_threads[gtid];
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003106
Jonathan Peyton30419822017-05-12 18:01:32 +00003107 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3108 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
3109 1); // top half must run before bottom half
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003110
Jonathan Peyton30419822017-05-12 18:01:32 +00003111 // We need to wait to make sure the top half is finished
3112 // Spinning here should be ok as this should happen quickly
3113 while (TCR_4(taskdata->td_incomplete_child_tasks) > 0)
3114 ;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003115
Jonathan Peyton30419822017-05-12 18:01:32 +00003116 __kmp_release_deps(gtid, taskdata);
3117 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003118}
3119
3120/*!
3121@ingroup TASKING
3122@param gtid Global Thread ID of encountering thread
3123@param ptask Task which execution is completed
3124
Jonathan Peyton30419822017-05-12 18:01:32 +00003125Execute the completation of a proxy task from a thread of that is part of the
3126team. Run first and bottom halves directly.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003127*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003128void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) {
3129 KMP_DEBUG_ASSERT(ptask != NULL);
3130 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3131 KA_TRACE(
3132 10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
3133 gtid, taskdata));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003134
Jonathan Peyton30419822017-05-12 18:01:32 +00003135 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003136
Jonathan Peyton30419822017-05-12 18:01:32 +00003137 __kmp_first_top_half_finish_proxy(taskdata);
3138 __kmp_second_top_half_finish_proxy(taskdata);
3139 __kmp_bottom_half_finish_proxy(gtid, ptask);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003140
Jonathan Peyton30419822017-05-12 18:01:32 +00003141 KA_TRACE(10,
3142 ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
3143 gtid, taskdata));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003144}
3145
3146/*!
3147@ingroup TASKING
3148@param ptask Task which execution is completed
3149
Jonathan Peyton30419822017-05-12 18:01:32 +00003150Execute the completation of a proxy task from a thread that could not belong to
3151the team.
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003152*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003153void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) {
3154 KMP_DEBUG_ASSERT(ptask != NULL);
3155 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003156
Jonathan Peyton30419822017-05-12 18:01:32 +00003157 KA_TRACE(
3158 10,
3159 ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
3160 taskdata));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003161
Jonathan Peyton30419822017-05-12 18:01:32 +00003162 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003163
Jonathan Peyton30419822017-05-12 18:01:32 +00003164 __kmp_first_top_half_finish_proxy(taskdata);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003165
Jonathan Peyton30419822017-05-12 18:01:32 +00003166 // Enqueue task to complete bottom half completion from a thread within the
3167 // corresponding team
3168 kmp_team_t *team = taskdata->td_team;
3169 kmp_int32 nthreads = team->t.t_nproc;
3170 kmp_info_t *thread;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00003171
Jonathan Peyton30419822017-05-12 18:01:32 +00003172 // This should be similar to start_k = __kmp_get_random( thread ) % nthreads
3173 // but we cannot use __kmp_get_random here
3174 kmp_int32 start_k = 0;
3175 kmp_int32 pass = 1;
3176 kmp_int32 k = start_k;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003177
Jonathan Peyton30419822017-05-12 18:01:32 +00003178 do {
3179 // For now we're just linearly trying to find a thread
3180 thread = team->t.t_threads[k];
3181 k = (k + 1) % nthreads;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00003182
Jonathan Peyton30419822017-05-12 18:01:32 +00003183 // we did a full pass through all the threads
3184 if (k == start_k)
3185 pass = pass << 1;
Jonathan Peytonf4f96952016-05-31 19:07:00 +00003186
Jonathan Peyton30419822017-05-12 18:01:32 +00003187 } while (!__kmp_give_task(thread, k, ptask, pass));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003188
Jonathan Peyton30419822017-05-12 18:01:32 +00003189 __kmp_second_top_half_finish_proxy(taskdata);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003190
Jonathan Peyton30419822017-05-12 18:01:32 +00003191 KA_TRACE(
3192 10,
3193 ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
3194 taskdata));
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003195}
3196
Jonathan Peyton30419822017-05-12 18:01:32 +00003197// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task
3198// for taskloop
Jonathan Peyton283a2152016-03-02 22:47:51 +00003199//
3200// thread: allocating thread
3201// task_src: pointer to source task to be duplicated
3202// returns: a pointer to the allocated kmp_task_t structure (task).
Jonathan Peyton30419822017-05-12 18:01:32 +00003203kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
3204 kmp_task_t *task;
3205 kmp_taskdata_t *taskdata;
3206 kmp_taskdata_t *taskdata_src;
3207 kmp_taskdata_t *parent_task = thread->th.th_current_task;
3208 size_t shareds_offset;
3209 size_t task_size;
Jonathan Peyton283a2152016-03-02 22:47:51 +00003210
Jonathan Peyton30419822017-05-12 18:01:32 +00003211 KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
3212 task_src));
3213 taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
3214 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
3215 TASK_FULL); // it should not be proxy task
3216 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
3217 task_size = taskdata_src->td_size_alloc;
Jonathan Peyton283a2152016-03-02 22:47:51 +00003218
Jonathan Peyton30419822017-05-12 18:01:32 +00003219 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
3220 KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
3221 task_size));
3222#if USE_FAST_MEMORY
3223 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
3224#else
3225 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
3226#endif /* USE_FAST_MEMORY */
3227 KMP_MEMCPY(taskdata, taskdata_src, task_size);
Jonathan Peyton283a2152016-03-02 22:47:51 +00003228
Jonathan Peyton30419822017-05-12 18:01:32 +00003229 task = KMP_TASKDATA_TO_TASK(taskdata);
Jonathan Peyton283a2152016-03-02 22:47:51 +00003230
Jonathan Peyton30419822017-05-12 18:01:32 +00003231 // Initialize new task (only specific fields not affected by memcpy)
3232 taskdata->td_task_id = KMP_GEN_TASK_ID();
3233 if (task->shareds != NULL) { // need setup shareds pointer
3234 shareds_offset = (char *)task_src->shareds - (char *)taskdata_src;
3235 task->shareds = &((char *)taskdata)[shareds_offset];
3236 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
3237 0);
3238 }
3239 taskdata->td_alloc_thread = thread;
3240 taskdata->td_taskgroup =
3241 parent_task
3242 ->td_taskgroup; // task inherits the taskgroup from the parent task
Jonathan Peyton283a2152016-03-02 22:47:51 +00003243
Jonathan Peyton30419822017-05-12 18:01:32 +00003244 // Only need to keep track of child task counts if team parallel and tasking
3245 // not serialized
3246 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00003247 KMP_TEST_THEN_INC32(&parent_task->td_incomplete_child_tasks);
Jonathan Peyton30419822017-05-12 18:01:32 +00003248 if (parent_task->td_taskgroup)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003249 KMP_TEST_THEN_INC32(&parent_task->td_taskgroup->count);
Jonathan Peyton30419822017-05-12 18:01:32 +00003250 // Only need to keep track of allocated child tasks for explicit tasks since
3251 // implicit not deallocated
3252 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00003253 KMP_TEST_THEN_INC32(&taskdata->td_parent->td_allocated_child_tasks);
Jonathan Peyton30419822017-05-12 18:01:32 +00003254 }
Jonathan Peyton283a2152016-03-02 22:47:51 +00003255
Jonathan Peyton30419822017-05-12 18:01:32 +00003256 KA_TRACE(20,
3257 ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
3258 thread, taskdata, taskdata->td_parent));
Jonathan Peyton283a2152016-03-02 22:47:51 +00003259#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003260 __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid,
3261 (void *)task->routine);
Jonathan Peyton283a2152016-03-02 22:47:51 +00003262#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003263 return task;
Jonathan Peyton283a2152016-03-02 22:47:51 +00003264}
3265
3266// Routine optionally generated by th ecompiler for setting the lastprivate flag
3267// and calling needed constructors for private/firstprivate objects
3268// (used to form taskloop tasks from pattern task)
Jonathan Peyton30419822017-05-12 18:01:32 +00003269typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
Jonathan Peyton283a2152016-03-02 22:47:51 +00003270
Jonathan Peyton283a2152016-03-02 22:47:51 +00003271// __kmp_taskloop_linear: Start tasks of the taskloop linearly
3272//
3273// loc Source location information
3274// gtid Global thread ID
3275// task Task with whole loop iteration range
3276// lb Pointer to loop lower bound
3277// ub Pointer to loop upper bound
3278// st Loop stride
3279// sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3280// grainsize Schedule value if specified
3281// task_dup Tasks duplication routine
Jonathan Peyton30419822017-05-12 18:01:32 +00003282void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
3283 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3284 int sched, kmp_uint64 grainsize, void *task_dup) {
3285 KMP_COUNT_BLOCK(OMP_TASKLOOP);
3286 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
3287 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
3288 kmp_uint64 tc;
3289 kmp_uint64 lower = *lb; // compiler provides global bounds here
3290 kmp_uint64 upper = *ub;
3291 kmp_uint64 i, num_tasks = 0, extras = 0;
3292 kmp_info_t *thread = __kmp_threads[gtid];
3293 kmp_taskdata_t *current_task = thread->th.th_current_task;
3294 kmp_task_t *next_task;
3295 kmp_int32 lastpriv = 0;
3296 size_t lower_offset =
3297 (char *)lb - (char *)task; // remember offset of lb in the task structure
3298 size_t upper_offset =
3299 (char *)ub - (char *)task; // remember offset of ub in the task structure
Jonathan Peyton283a2152016-03-02 22:47:51 +00003300
Jonathan Peyton30419822017-05-12 18:01:32 +00003301 // compute trip count
3302 if (st == 1) { // most common case
3303 tc = upper - lower + 1;
3304 } else if (st < 0) {
3305 tc = (lower - upper) / (-st) + 1;
3306 } else { // st > 0
3307 tc = (upper - lower) / st + 1;
3308 }
3309 if (tc == 0) {
3310 KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
Jonathan Peyton283a2152016-03-02 22:47:51 +00003311 // free the pattern task and exit
Jonathan Peyton30419822017-05-12 18:01:32 +00003312 __kmp_task_start(gtid, task, current_task);
3313 // do not execute anything for zero-trip loop
3314 __kmp_task_finish(gtid, task, current_task);
3315 return;
3316 }
3317
3318 // compute num_tasks/grainsize based on the input provided
3319 switch (sched) {
3320 case 0: // no schedule clause specified, we can choose the default
3321 // let's try to schedule (team_size*10) tasks
3322 grainsize = thread->th.th_team_nproc * 10;
3323 case 2: // num_tasks provided
3324 if (grainsize > tc) {
3325 num_tasks = tc; // too big num_tasks requested, adjust values
3326 grainsize = 1;
3327 extras = 0;
3328 } else {
3329 num_tasks = grainsize;
3330 grainsize = tc / num_tasks;
3331 extras = tc % num_tasks;
3332 }
3333 break;
3334 case 1: // grainsize provided
3335 if (grainsize > tc) {
3336 num_tasks = 1; // too big grainsize requested, adjust values
3337 grainsize = tc;
3338 extras = 0;
3339 } else {
3340 num_tasks = tc / grainsize;
3341 grainsize =
3342 tc /
3343 num_tasks; // adjust grainsize for balanced distribution of iterations
3344 extras = tc % num_tasks;
3345 }
3346 break;
3347 default:
3348 KMP_ASSERT2(0, "unknown scheduling of taskloop");
3349 }
3350 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3351 KMP_DEBUG_ASSERT(num_tasks > extras);
3352 KMP_DEBUG_ASSERT(num_tasks > 0);
3353 KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize "
3354 "%lld, extras %lld\n",
3355 gtid, num_tasks, grainsize, extras));
3356
3357 // Main loop, launch num_tasks tasks, assign grainsize iterations each task
3358 for (i = 0; i < num_tasks; ++i) {
3359 kmp_uint64 chunk_minus_1;
3360 if (extras == 0) {
3361 chunk_minus_1 = grainsize - 1;
3362 } else {
3363 chunk_minus_1 = grainsize;
3364 --extras; // first extras iterations get bigger chunk (grainsize+1)
3365 }
3366 upper = lower + st * chunk_minus_1;
3367 if (i == num_tasks - 1) {
3368 // schedule the last task, set lastprivate flag
3369 lastpriv = 1;
3370#if KMP_DEBUG
3371 if (st == 1)
3372 KMP_DEBUG_ASSERT(upper == *ub);
3373 else if (st > 0)
3374 KMP_DEBUG_ASSERT(upper + st > *ub);
3375 else
3376 KMP_DEBUG_ASSERT(upper + st < *ub);
3377#endif
3378 }
3379 next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
3380 *(kmp_uint64 *)((char *)next_task + lower_offset) =
3381 lower; // adjust task-specific bounds
3382 *(kmp_uint64 *)((char *)next_task + upper_offset) = upper;
3383 if (ptask_dup != NULL)
3384 ptask_dup(next_task, task,
3385 lastpriv); // set lastprivate flag, construct fistprivates, etc.
3386 KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper "
3387 "%lld (offsets %p %p)\n",
3388 gtid, next_task, lower, upper, lower_offset, upper_offset));
3389 __kmp_omp_task(gtid, next_task, true); // schedule new task
3390 lower = upper + st; // adjust lower bound for the next iteration
3391 }
3392 // free the pattern task and exit
3393 __kmp_task_start(gtid, task, current_task);
3394 // do not execute the pattern task, just do bookkeeping
3395 __kmp_task_finish(gtid, task, current_task);
Jonathan Peyton283a2152016-03-02 22:47:51 +00003396}
3397
3398/*!
3399@ingroup TASKING
3400@param loc Source location information
3401@param gtid Global thread ID
3402@param task Task structure
3403@param if_val Value of the if clause
3404@param lb Pointer to loop lower bound
3405@param ub Pointer to loop upper bound
3406@param st Loop stride
3407@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
3408@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
3409@param grainsize Schedule value if specified
3410@param task_dup Tasks duplication routine
3411
3412Execute the taskloop construct.
3413*/
Jonathan Peyton30419822017-05-12 18:01:32 +00003414void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
3415 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
3416 int sched, kmp_uint64 grainsize, void *task_dup) {
3417 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3418 KMP_DEBUG_ASSERT(task != NULL);
Jonathan Peyton283a2152016-03-02 22:47:51 +00003419
Jonathan Peyton30419822017-05-12 18:01:32 +00003420 KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub "
3421 "%lld st %lld, grain %llu(%d)\n",
3422 gtid, taskdata, *lb, *ub, st, grainsize, sched));
Jonathan Peyton283a2152016-03-02 22:47:51 +00003423
Jonathan Peyton30419822017-05-12 18:01:32 +00003424 // check if clause value first
3425 if (if_val == 0) { // if(0) specified, mark task as serial
3426 taskdata->td_flags.task_serial = 1;
3427 taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
3428 }
3429 if (nogroup == 0) {
3430 __kmpc_taskgroup(loc, gtid);
3431 }
Jonathan Peyton283a2152016-03-02 22:47:51 +00003432
Jonathan Peyton30419822017-05-12 18:01:32 +00003433 if (1 /* AC: use some heuristic here to choose task scheduling method */) {
3434 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, sched, grainsize,
3435 task_dup);
3436 }
Jonathan Peyton283a2152016-03-02 22:47:51 +00003437
Jonathan Peyton30419822017-05-12 18:01:32 +00003438 if (nogroup == 0) {
3439 __kmpc_end_taskgroup(loc, gtid);
3440 }
3441 KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
Jonathan Peyton283a2152016-03-02 22:47:51 +00003442}
3443
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003444#endif