blob: 4efd12bac8aa81cbb02bb7162864bf5297347e6e [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_taskdeps.cpp
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
Jim Cownie5e8470a2013-09-27 10:38:44 +000014//#define KMP_SUPPORT_GRAPH_OUTPUT 1
15
16#include "kmp.h"
17#include "kmp_io.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000018#include "kmp_wait_release.h"
Joachim Protze82e94a52017-11-01 10:08:30 +000019#if OMPT_SUPPORT
20#include "ompt-specific.h"
21#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000022
23#if OMP_40_ENABLED
24
Jonathan Peyton30419822017-05-12 18:01:32 +000025// TODO: Improve memory allocation? keep a list of pre-allocated structures?
26// allocate in blocks? re-use list finished list entries?
27// TODO: don't use atomic ref counters for stack-allocated nodes.
28// TODO: find an alternate to atomic refs for heap-allocated nodes?
29// TODO: Finish graph output support
30// TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other
31// runtime locks
32// TODO: Any ITT support needed?
Jim Cownie5e8470a2013-09-27 10:38:44 +000033
34#ifdef KMP_SUPPORT_GRAPH_OUTPUT
Jonathan Peyton37e2ef52018-07-09 17:36:22 +000035static std::atomic<kmp_int32> kmp_node_id_seed = ATOMIC_VAR_INIT(0);
Jim Cownie5e8470a2013-09-27 10:38:44 +000036#endif
37
Jonathan Peyton30419822017-05-12 18:01:32 +000038static void __kmp_init_node(kmp_depnode_t *node) {
39 node->dn.task = NULL; // set to null initially, it will point to the right
40 // task once dependences have been processed
41 node->dn.successors = NULL;
42 __kmp_init_lock(&node->dn.lock);
Jonathan Peyton61d44f12018-07-09 18:09:25 +000043 KMP_ATOMIC_ST_RLX(&node->dn.nrefs,
44 1); // init creates the first reference to the node
Jim Cownie5e8470a2013-09-27 10:38:44 +000045#ifdef KMP_SUPPORT_GRAPH_OUTPUT
Jonathan Peyton37e2ef52018-07-09 17:36:22 +000046 node->dn.id = KMP_ATOMIC_INC(&kmp_node_id_seed);
Jim Cownie5e8470a2013-09-27 10:38:44 +000047#endif
48}
49
Jonathan Peyton30419822017-05-12 18:01:32 +000050static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) {
Jonathan Peyton37e2ef52018-07-09 17:36:22 +000051 KMP_ATOMIC_INC(&node->dn.nrefs);
Jonathan Peyton30419822017-05-12 18:01:32 +000052 return node;
Jim Cownie5e8470a2013-09-27 10:38:44 +000053}
54
Jonathan Peyton30419822017-05-12 18:01:32 +000055static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) {
56 if (!node)
57 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +000058
Jonathan Peyton37e2ef52018-07-09 17:36:22 +000059 kmp_int32 n = KMP_ATOMIC_DEC(&node->dn.nrefs) - 1;
Jonathan Peyton30419822017-05-12 18:01:32 +000060 if (n == 0) {
61 KMP_ASSERT(node->dn.nrefs == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +000062#if USE_FAST_MEMORY
Jonathan Peyton30419822017-05-12 18:01:32 +000063 __kmp_fast_free(thread, node);
Jim Cownie5e8470a2013-09-27 10:38:44 +000064#else
Jonathan Peyton30419822017-05-12 18:01:32 +000065 __kmp_thread_free(thread, node);
Jim Cownie5e8470a2013-09-27 10:38:44 +000066#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000067 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000068}
69
Jonathan Peyton30419822017-05-12 18:01:32 +000070#define KMP_ACQUIRE_DEPNODE(gtid, n) __kmp_acquire_lock(&(n)->dn.lock, (gtid))
71#define KMP_RELEASE_DEPNODE(gtid, n) __kmp_release_lock(&(n)->dn.lock, (gtid))
Jim Cownie5e8470a2013-09-27 10:38:44 +000072
Jonathan Peyton30419822017-05-12 18:01:32 +000073static void __kmp_depnode_list_free(kmp_info_t *thread, kmp_depnode_list *list);
Jim Cownie5e8470a2013-09-27 10:38:44 +000074
Jonathan Peyton30419822017-05-12 18:01:32 +000075enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 };
Jim Cownie5e8470a2013-09-27 10:38:44 +000076
Jonathan Peyton30419822017-05-12 18:01:32 +000077static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) {
78 // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) %
79 // m_num_sets );
80 return ((addr >> 6) ^ (addr >> 2)) % hsize;
Jim Cownie5e8470a2013-09-27 10:38:44 +000081}
82
Jonathan Peyton30419822017-05-12 18:01:32 +000083static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
84 kmp_taskdata_t *current_task) {
85 kmp_dephash_t *h;
Jim Cownie4cc4bb42014-10-07 16:25:50 +000086
Jonathan Peyton30419822017-05-12 18:01:32 +000087 size_t h_size;
Jonathan Peyton7d454512016-01-28 23:10:44 +000088
Jonathan Peyton30419822017-05-12 18:01:32 +000089 if (current_task->td_flags.tasktype == TASK_IMPLICIT)
90 h_size = KMP_DEPHASH_MASTER_SIZE;
91 else
92 h_size = KMP_DEPHASH_OTHER_SIZE;
Jonathan Peyton7d454512016-01-28 23:10:44 +000093
Jonathan Peyton30419822017-05-12 18:01:32 +000094 kmp_int32 size =
95 h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
Jim Cownie4cc4bb42014-10-07 16:25:50 +000096
Jim Cownie5e8470a2013-09-27 10:38:44 +000097#if USE_FAST_MEMORY
Jonathan Peyton30419822017-05-12 18:01:32 +000098 h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size);
Jim Cownie5e8470a2013-09-27 10:38:44 +000099#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000100 h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000101#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000102 h->size = h_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000104#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000105 h->nelements = 0;
106 h->nconflicts = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000107#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000108 h->buckets = (kmp_dephash_entry **)(h + 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000109
Jonathan Peyton30419822017-05-12 18:01:32 +0000110 for (size_t i = 0; i < h_size; i++)
111 h->buckets[i] = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000112
Jonathan Peyton30419822017-05-12 18:01:32 +0000113 return h;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000114}
115
Jonathan Peyton30419822017-05-12 18:01:32 +0000116void __kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h) {
117 for (size_t i = 0; i < h->size; i++) {
118 if (h->buckets[i]) {
119 kmp_dephash_entry_t *next;
120 for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) {
121 next = entry->next_in_bucket;
122 __kmp_depnode_list_free(thread, entry->last_ins);
123 __kmp_node_deref(thread, entry->last_out);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000124#if USE_FAST_MEMORY
Jonathan Peyton30419822017-05-12 18:01:32 +0000125 __kmp_fast_free(thread, entry);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000126#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000127 __kmp_thread_free(thread, entry);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000129 }
130 h->buckets[i] = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000131 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000132 }
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +0000133}
134
Jonathan Peyton30419822017-05-12 18:01:32 +0000135void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) {
136 __kmp_dephash_free_entries(thread, h);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000137#if USE_FAST_MEMORY
Jonathan Peyton30419822017-05-12 18:01:32 +0000138 __kmp_fast_free(thread, h);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000139#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000140 __kmp_thread_free(thread, h);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000141#endif
142}
143
144static kmp_dephash_entry *
Jonathan Peyton30419822017-05-12 18:01:32 +0000145__kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) {
146 kmp_int32 bucket = __kmp_dephash_hash(addr, h->size);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000147
Jonathan Peyton30419822017-05-12 18:01:32 +0000148 kmp_dephash_entry_t *entry;
149 for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket)
150 if (entry->addr == addr)
151 break;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000152
Jonathan Peyton30419822017-05-12 18:01:32 +0000153 if (entry == NULL) {
154// create entry. This is only done by one thread so no locking required
Jim Cownie5e8470a2013-09-27 10:38:44 +0000155#if USE_FAST_MEMORY
Jonathan Peyton30419822017-05-12 18:01:32 +0000156 entry = (kmp_dephash_entry_t *)__kmp_fast_allocate(
157 thread, sizeof(kmp_dephash_entry_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000158#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000159 entry = (kmp_dephash_entry_t *)__kmp_thread_malloc(
160 thread, sizeof(kmp_dephash_entry_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000161#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000162 entry->addr = addr;
163 entry->last_out = NULL;
164 entry->last_ins = NULL;
165 entry->next_in_bucket = h->buckets[bucket];
166 h->buckets[bucket] = entry;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000167#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000168 h->nelements++;
169 if (entry->next_in_bucket)
170 h->nconflicts++;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000171#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000172 }
173 return entry;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000174}
175
Jonathan Peyton30419822017-05-12 18:01:32 +0000176static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread,
177 kmp_depnode_list_t *list,
178 kmp_depnode_t *node) {
179 kmp_depnode_list_t *new_head;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000180
181#if USE_FAST_MEMORY
Jonathan Peyton30419822017-05-12 18:01:32 +0000182 new_head = (kmp_depnode_list_t *)__kmp_fast_allocate(
183 thread, sizeof(kmp_depnode_list_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000184#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000185 new_head = (kmp_depnode_list_t *)__kmp_thread_malloc(
186 thread, sizeof(kmp_depnode_list_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000187#endif
188
Jonathan Peyton30419822017-05-12 18:01:32 +0000189 new_head->node = __kmp_node_ref(node);
190 new_head->next = list;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000191
Jonathan Peyton30419822017-05-12 18:01:32 +0000192 return new_head;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000193}
194
Jonathan Peyton30419822017-05-12 18:01:32 +0000195static void __kmp_depnode_list_free(kmp_info_t *thread,
196 kmp_depnode_list *list) {
197 kmp_depnode_list *next;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000198
Jonathan Peyton30419822017-05-12 18:01:32 +0000199 for (; list; list = next) {
200 next = list->next;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000201
Jonathan Peyton30419822017-05-12 18:01:32 +0000202 __kmp_node_deref(thread, list->node);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000203#if USE_FAST_MEMORY
Jonathan Peyton30419822017-05-12 18:01:32 +0000204 __kmp_fast_free(thread, list);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000205#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000206 __kmp_thread_free(thread, list);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000207#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000208 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000209}
210
Jonathan Peyton30419822017-05-12 18:01:32 +0000211static inline void __kmp_track_dependence(kmp_depnode_t *source,
212 kmp_depnode_t *sink,
213 kmp_task_t *sink_task) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000214#ifdef KMP_SUPPORT_GRAPH_OUTPUT
Jonathan Peyton30419822017-05-12 18:01:32 +0000215 kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
216 // do not use sink->dn.task as that is only filled after the dependencies
217 // are already processed!
218 kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000219
Jonathan Peyton30419822017-05-12 18:01:32 +0000220 __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id,
221 task_source->td_ident->psource, sink->dn.id,
222 task_sink->td_ident->psource);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000223#endif
Joachim Protze82e94a52017-11-01 10:08:30 +0000224#if OMPT_SUPPORT && OMPT_OPTIONAL
225 /* OMPT tracks dependences between task (a=source, b=sink) in which
226 task a blocks the execution of b through the ompt_new_dependence_callback
227 */
228 if (ompt_enabled.ompt_callback_task_dependence) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000229 kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
230 kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000231
Joachim Protze82e94a52017-11-01 10:08:30 +0000232 ompt_callbacks.ompt_callback(ompt_callback_task_dependence)(
233 &(task_source->ompt_task_info.task_data),
234 &(task_sink->ompt_task_info.task_data));
Jonathan Peyton30419822017-05-12 18:01:32 +0000235 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000236#endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000237}
238
Jonathan Peyton30419822017-05-12 18:01:32 +0000239template <bool filter>
Jim Cownie5e8470a2013-09-27 10:38:44 +0000240static inline kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +0000241__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
242 bool dep_barrier, kmp_int32 ndeps,
243 kmp_depend_info_t *dep_list, kmp_task_t *task) {
244 KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : "
245 "dep_barrier = %d\n",
246 filter, gtid, ndeps, dep_barrier));
Jonathan Peyton61118492016-05-20 19:03:38 +0000247
Jonathan Peyton30419822017-05-12 18:01:32 +0000248 kmp_info_t *thread = __kmp_threads[gtid];
249 kmp_int32 npredecessors = 0;
250 for (kmp_int32 i = 0; i < ndeps; i++) {
251 const kmp_depend_info_t *dep = &dep_list[i];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252
Jonathan Peyton30419822017-05-12 18:01:32 +0000253 KMP_DEBUG_ASSERT(dep->flags.in);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000254
Jonathan Peyton30419822017-05-12 18:01:32 +0000255 if (filter && dep->base_addr == 0)
256 continue; // skip filtered entries
Jim Cownie5e8470a2013-09-27 10:38:44 +0000257
Jonathan Peyton30419822017-05-12 18:01:32 +0000258 kmp_dephash_entry_t *info =
259 __kmp_dephash_find(thread, hash, dep->base_addr);
260 kmp_depnode_t *last_out = info->last_out;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000261
Jonathan Peyton30419822017-05-12 18:01:32 +0000262 if (dep->flags.out && info->last_ins) {
263 for (kmp_depnode_list_t *p = info->last_ins; p; p = p->next) {
264 kmp_depnode_t *indep = p->node;
265 if (indep->dn.task) {
266 KMP_ACQUIRE_DEPNODE(gtid, indep);
267 if (indep->dn.task) {
268 __kmp_track_dependence(indep, node, task);
269 indep->dn.successors =
270 __kmp_add_node(thread, indep->dn.successors, node);
271 KA_TRACE(40, ("__kmp_process_deps<%d>: T#%d adding dependence from "
272 "%p to %p\n",
273 filter, gtid, KMP_TASK_TO_TASKDATA(indep->dn.task),
274 KMP_TASK_TO_TASKDATA(task)));
275 npredecessors++;
276 }
277 KMP_RELEASE_DEPNODE(gtid, indep);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000278 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000279 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000280
Jonathan Peyton30419822017-05-12 18:01:32 +0000281 __kmp_depnode_list_free(thread, info->last_ins);
282 info->last_ins = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000283
Jonathan Peyton30419822017-05-12 18:01:32 +0000284 } else if (last_out && last_out->dn.task) {
285 KMP_ACQUIRE_DEPNODE(gtid, last_out);
286 if (last_out->dn.task) {
287 __kmp_track_dependence(last_out, node, task);
288 last_out->dn.successors =
289 __kmp_add_node(thread, last_out->dn.successors, node);
290 KA_TRACE(
291 40,
292 ("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n",
293 filter, gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task),
294 KMP_TASK_TO_TASKDATA(task)));
295
296 npredecessors++;
297 }
298 KMP_RELEASE_DEPNODE(gtid, last_out);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000299 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000300
Jonathan Peyton30419822017-05-12 18:01:32 +0000301 if (dep_barrier) {
302 // if this is a sync point in the serial sequence, then the previous
303 // outputs are guaranteed to be completed after
304 // the execution of this task so the previous output nodes can be cleared.
305 __kmp_node_deref(thread, last_out);
306 info->last_out = NULL;
307 } else {
308 if (dep->flags.out) {
309 __kmp_node_deref(thread, last_out);
310 info->last_out = __kmp_node_ref(node);
311 } else
312 info->last_ins = __kmp_add_node(thread, info->last_ins, node);
313 }
314 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000315
Jonathan Peyton30419822017-05-12 18:01:32 +0000316 KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,
317 gtid, npredecessors));
318
319 return npredecessors;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000320}
321
322#define NO_DEP_BARRIER (false)
323#define DEP_BARRIER (true)
324
325// returns true if the task has any outstanding dependence
Jonathan Peyton30419822017-05-12 18:01:32 +0000326static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
327 kmp_task_t *task, kmp_dephash_t *hash,
328 bool dep_barrier, kmp_int32 ndeps,
329 kmp_depend_info_t *dep_list,
330 kmp_int32 ndeps_noalias,
331 kmp_depend_info_t *noalias_dep_list) {
332 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000333
Jonathan Peytond2eb3c72015-08-26 20:02:21 +0000334#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000335 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
Jonathan Peytond2eb3c72015-08-26 20:02:21 +0000336#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000337 KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d "
338 "possibly aliased dependencies, %d non-aliased depedencies : "
339 "dep_barrier=%d .\n",
340 gtid, taskdata, ndeps, ndeps_noalias, dep_barrier));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000341
Jonathan Peyton30419822017-05-12 18:01:32 +0000342 // Filter deps in dep_list
343 // TODO: Different algorithm for large dep_list ( > 10 ? )
344 for (i = 0; i < ndeps; i++) {
345 if (dep_list[i].base_addr != 0)
346 for (int j = i + 1; j < ndeps; j++)
347 if (dep_list[i].base_addr == dep_list[j].base_addr) {
348 dep_list[i].flags.in |= dep_list[j].flags.in;
349 dep_list[i].flags.out |= dep_list[j].flags.out;
350 dep_list[j].base_addr = 0; // Mark j element as void
351 }
352 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000353
Jonathan Peyton30419822017-05-12 18:01:32 +0000354 // doesn't need to be atomic as no other thread is going to be accessing this
355 // node just yet.
356 // npredecessors is set -1 to ensure that none of the releasing tasks queues
357 // this task before we have finished processing all the dependencies
358 node->dn.npredecessors = -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000359
Jonathan Peyton30419822017-05-12 18:01:32 +0000360 // used to pack all npredecessors additions into a single atomic operation at
361 // the end
362 int npredecessors;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000363
Jonathan Peyton30419822017-05-12 18:01:32 +0000364 npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, ndeps,
365 dep_list, task);
366 npredecessors += __kmp_process_deps<false>(
367 gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000368
Jonathan Peyton30419822017-05-12 18:01:32 +0000369 node->dn.task = task;
370 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000371
Jonathan Peyton30419822017-05-12 18:01:32 +0000372 // Account for our initial fake value
373 npredecessors++;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000374
Jonathan Peyton30419822017-05-12 18:01:32 +0000375 // Update predecessors and obtain current value to check if there are still
376 // any outstandig dependences (some tasks may have finished while we processed
377 // the dependences)
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000378 npredecessors =
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000379 node->dn.npredecessors.fetch_add(npredecessors) + npredecessors;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000380
Jonathan Peyton30419822017-05-12 18:01:32 +0000381 KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n",
382 gtid, npredecessors, taskdata));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000383
Jonathan Peyton30419822017-05-12 18:01:32 +0000384 // beyond this point the task could be queued (and executed) by a releasing
385 // task...
386 return npredecessors > 0 ? true : false;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000387}
388
Jonathan Peyton30419822017-05-12 18:01:32 +0000389void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
390 kmp_info_t *thread = __kmp_threads[gtid];
391 kmp_depnode_t *node = task->td_depnode;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000392
Jonathan Peyton30419822017-05-12 18:01:32 +0000393 if (task->td_dephash) {
394 KA_TRACE(
395 40, ("__kmp_release_deps: T#%d freeing dependencies hash of task %p.\n",
396 gtid, task));
397 __kmp_dephash_free(thread, task->td_dephash);
398 task->td_dephash = NULL;
399 }
400
401 if (!node)
402 return;
403
404 KA_TRACE(20, ("__kmp_release_deps: T#%d notifying successors of task %p.\n",
405 gtid, task));
406
407 KMP_ACQUIRE_DEPNODE(gtid, node);
408 node->dn.task =
409 NULL; // mark this task as finished, so no new dependencies are generated
410 KMP_RELEASE_DEPNODE(gtid, node);
411
412 kmp_depnode_list_t *next;
413 for (kmp_depnode_list_t *p = node->dn.successors; p; p = next) {
414 kmp_depnode_t *successor = p->node;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000415 kmp_int32 npredecessors = KMP_ATOMIC_DEC(&successor->dn.npredecessors) - 1;
416
Jonathan Peyton30419822017-05-12 18:01:32 +0000417 // successor task can be NULL for wait_depends or because deps are still
418 // being processed
419 if (npredecessors == 0) {
420 KMP_MB();
421 if (successor->dn.task) {
422 KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled "
423 "for execution.\n",
424 gtid, successor->dn.task, task));
425 __kmp_omp_task(gtid, successor->dn.task, false);
426 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000427 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000428
Jonathan Peyton30419822017-05-12 18:01:32 +0000429 next = p->next;
430 __kmp_node_deref(thread, p->node);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000431#if USE_FAST_MEMORY
Jonathan Peyton30419822017-05-12 18:01:32 +0000432 __kmp_fast_free(thread, p);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000433#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000434 __kmp_thread_free(thread, p);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000435#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000436 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000437
Jonathan Peyton30419822017-05-12 18:01:32 +0000438 __kmp_node_deref(thread, node);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000439
Jonathan Peyton30419822017-05-12 18:01:32 +0000440 KA_TRACE(
441 20,
442 ("__kmp_release_deps: T#%d all successors of %p notified of completion\n",
443 gtid, task));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000444}
445
446/*!
447@ingroup TASKING
448@param loc_ref location of the original task directive
449@param gtid Global Thread ID of encountering thread
Jonathan Peyton30419822017-05-12 18:01:32 +0000450@param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new
451task''
Jim Cownie5e8470a2013-09-27 10:38:44 +0000452@param ndeps Number of depend items with possible aliasing
453@param dep_list List of depend items with possible aliasing
454@param ndeps_noalias Number of depend items with no aliasing
455@param noalias_dep_list List of depend items with no aliasing
456
Jonathan Peyton30419822017-05-12 18:01:32 +0000457@return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not
458suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued
Jim Cownie5e8470a2013-09-27 10:38:44 +0000459
460Schedule a non-thread-switchable task with dependences for execution
461*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000462kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
463 kmp_task_t *new_task, kmp_int32 ndeps,
464 kmp_depend_info_t *dep_list,
465 kmp_int32 ndeps_noalias,
466 kmp_depend_info_t *noalias_dep_list) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000467
Jonathan Peyton30419822017-05-12 18:01:32 +0000468 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
469 KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid,
470 loc_ref, new_taskdata));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000471
Jonathan Peyton30419822017-05-12 18:01:32 +0000472 kmp_info_t *thread = __kmp_threads[gtid];
473 kmp_taskdata_t *current_task = thread->th.th_current_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000474
Joachim Protze82e94a52017-11-01 10:08:30 +0000475#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +0000476 if (ompt_enabled.enabled) {
Joachim Protze265fb582017-12-24 07:30:23 +0000477 OMPT_STORE_RETURN_ADDRESS(gtid);
478 if (!current_task->ompt_task_info.frame.enter_frame)
Joachim Protzeb0e4f872018-02-17 09:54:10 +0000479 current_task->ompt_task_info.frame.enter_frame =
480 OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +0000481 if (ompt_enabled.ompt_callback_task_create) {
Joachim Protze82e94a52017-11-01 10:08:30 +0000482 ompt_data_t task_data = ompt_data_none;
483 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
Joachim Protze265fb582017-12-24 07:30:23 +0000484 current_task ? &(current_task->ompt_task_info.task_data) : &task_data,
485 current_task ? &(current_task->ompt_task_info.frame) : NULL,
Joachim Protze82e94a52017-11-01 10:08:30 +0000486 &(new_taskdata->ompt_task_info.task_data),
487 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1,
488 OMPT_LOAD_RETURN_ADDRESS(gtid));
489 }
490
Joachim Protzec255ca72017-11-05 14:11:10 +0000491 new_taskdata->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +0000492 }
493
494#if OMPT_OPTIONAL
Jonathan Peyton30419822017-05-12 18:01:32 +0000495 /* OMPT grab all dependences if requested by the tool */
Joachim Protze82e94a52017-11-01 10:08:30 +0000496 if (ndeps + ndeps_noalias > 0 &&
497 ompt_enabled.ompt_callback_task_dependences) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000498 kmp_int32 i;
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000499
Jonathan Peyton30419822017-05-12 18:01:32 +0000500 new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias;
501 new_taskdata->ompt_task_info.deps =
502 (ompt_task_dependence_t *)KMP_OMPT_DEPS_ALLOC(
503 thread, (ndeps + ndeps_noalias) * sizeof(ompt_task_dependence_t));
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000504
Jonathan Peyton30419822017-05-12 18:01:32 +0000505 KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL);
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000506
Jonathan Peyton30419822017-05-12 18:01:32 +0000507 for (i = 0; i < ndeps; i++) {
508 new_taskdata->ompt_task_info.deps[i].variable_addr =
509 (void *)dep_list[i].base_addr;
510 if (dep_list[i].flags.in && dep_list[i].flags.out)
511 new_taskdata->ompt_task_info.deps[i].dependence_flags =
512 ompt_task_dependence_type_inout;
513 else if (dep_list[i].flags.out)
514 new_taskdata->ompt_task_info.deps[i].dependence_flags =
515 ompt_task_dependence_type_out;
516 else if (dep_list[i].flags.in)
517 new_taskdata->ompt_task_info.deps[i].dependence_flags =
518 ompt_task_dependence_type_in;
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000519 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000520 for (i = 0; i < ndeps_noalias; i++) {
521 new_taskdata->ompt_task_info.deps[ndeps + i].variable_addr =
522 (void *)noalias_dep_list[i].base_addr;
523 if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
524 new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags =
525 ompt_task_dependence_type_inout;
526 else if (noalias_dep_list[i].flags.out)
527 new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags =
528 ompt_task_dependence_type_out;
529 else if (noalias_dep_list[i].flags.in)
530 new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags =
531 ompt_task_dependence_type_in;
532 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000533 ompt_callbacks.ompt_callback(ompt_callback_task_dependences)(
534 &(new_taskdata->ompt_task_info.task_data),
535 new_taskdata->ompt_task_info.deps, new_taskdata->ompt_task_info.ndeps);
536 /* We can now free the allocated memory for the dependencies */
537 /* For OMPD we might want to delay the free until task_end */
538 KMP_OMPT_DEPS_FREE(thread, new_taskdata->ompt_task_info.deps);
539 new_taskdata->ompt_task_info.deps = NULL;
540 new_taskdata->ompt_task_info.ndeps = 0;
Jonathan Peyton30419822017-05-12 18:01:32 +0000541 }
Joachim Protze82e94a52017-11-01 10:08:30 +0000542#endif /* OMPT_OPTIONAL */
543#endif /* OMPT_SUPPORT */
Jonas Hahnfeld39b68622016-01-28 10:39:52 +0000544
Jonathan Peyton30419822017-05-12 18:01:32 +0000545 bool serial = current_task->td_flags.team_serial ||
546 current_task->td_flags.tasking_ser ||
547 current_task->td_flags.final;
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000548#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000549 kmp_task_team_t *task_team = thread->th.th_task_team;
550 serial = serial && !(task_team && task_team->tt.tt_found_proxy_tasks);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000551#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000552
Jonathan Peyton30419822017-05-12 18:01:32 +0000553 if (!serial && (ndeps > 0 || ndeps_noalias > 0)) {
554 /* if no dependencies have been tracked yet, create the dependence hash */
555 if (current_task->td_dephash == NULL)
556 current_task->td_dephash = __kmp_dephash_create(thread, current_task);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000557
558#if USE_FAST_MEMORY
Jonathan Peyton30419822017-05-12 18:01:32 +0000559 kmp_depnode_t *node =
560 (kmp_depnode_t *)__kmp_fast_allocate(thread, sizeof(kmp_depnode_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000561#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000562 kmp_depnode_t *node =
563 (kmp_depnode_t *)__kmp_thread_malloc(thread, sizeof(kmp_depnode_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000564#endif
565
Jonathan Peyton30419822017-05-12 18:01:32 +0000566 __kmp_init_node(node);
567 new_taskdata->td_depnode = node;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000568
Jonathan Peyton30419822017-05-12 18:01:32 +0000569 if (__kmp_check_deps(gtid, node, new_task, current_task->td_dephash,
570 NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
571 noalias_dep_list)) {
572 KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
573 "dependencies: "
574 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
575 gtid, loc_ref, new_taskdata));
Joachim Protze265fb582017-12-24 07:30:23 +0000576#if OMPT_SUPPORT
577 if (ompt_enabled.enabled) {
578 current_task->ompt_task_info.frame.enter_frame = NULL;
579 }
580#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000581 return TASK_CURRENT_NOT_QUEUED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000582 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000583 } else {
584 KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies "
585 "for task (serialized)"
586 "loc=%p task=%p\n",
587 gtid, loc_ref, new_taskdata));
588 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000589
Jonathan Peyton30419822017-05-12 18:01:32 +0000590 KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking "
591 "dependencies : "
592 "loc=%p task=%p, transferring to __kmpc_omp_task\n",
593 gtid, loc_ref, new_taskdata));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000594
Joachim Protze265fb582017-12-24 07:30:23 +0000595 kmp_int32 ret = __kmp_omp_task(gtid, new_task, true);
596#if OMPT_SUPPORT
597 if (ompt_enabled.enabled) {
598 current_task->ompt_task_info.frame.enter_frame = NULL;
599 }
600#endif
601 return ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000602}
603
604/*!
605@ingroup TASKING
606@param loc_ref location of the original task directive
607@param gtid Global Thread ID of encountering thread
608@param ndeps Number of depend items with possible aliasing
609@param dep_list List of depend items with possible aliasing
610@param ndeps_noalias Number of depend items with no aliasing
611@param noalias_dep_list List of depend items with no aliasing
612
613Blocks the current task until all specifies dependencies have been fulfilled.
614*/
Jonathan Peyton30419822017-05-12 18:01:32 +0000615void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
616 kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
617 kmp_depend_info_t *noalias_dep_list) {
618 KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000619
Jonathan Peyton30419822017-05-12 18:01:32 +0000620 if (ndeps == 0 && ndeps_noalias == 0) {
621 KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to "
622 "wait upon : loc=%p\n",
623 gtid, loc_ref));
624 return;
625 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000626
Jonathan Peyton30419822017-05-12 18:01:32 +0000627 kmp_info_t *thread = __kmp_threads[gtid];
628 kmp_taskdata_t *current_task = thread->th.th_current_task;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000629
Jonathan Peyton30419822017-05-12 18:01:32 +0000630 // We can return immediately as:
631 // - dependences are not computed in serial teams (except with proxy tasks)
632 // - if the dephash is not yet created it means we have nothing to wait for
633 bool ignore = current_task->td_flags.team_serial ||
634 current_task->td_flags.tasking_ser ||
635 current_task->td_flags.final;
Jonathan Peytondf6818b2016-06-14 17:57:47 +0000636#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000637 ignore = ignore && thread->th.th_task_team != NULL &&
638 thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000639#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000640 ignore = ignore || current_task->td_dephash == NULL;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +0000641
Jonathan Peyton30419822017-05-12 18:01:32 +0000642 if (ignore) {
643 KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
644 "dependencies : loc=%p\n",
645 gtid, loc_ref));
646 return;
647 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000648
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000649 kmp_depnode_t node = {0};
Jonathan Peyton30419822017-05-12 18:01:32 +0000650 __kmp_init_node(&node);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000651
Jonathan Peyton30419822017-05-12 18:01:32 +0000652 if (!__kmp_check_deps(gtid, &node, NULL, current_task->td_dephash,
653 DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
654 noalias_dep_list)) {
655 KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
656 "dependencies : loc=%p\n",
657 gtid, loc_ref));
658 return;
659 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000660
Jonathan Peyton30419822017-05-12 18:01:32 +0000661 int thread_finished = FALSE;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000662 kmp_flag_32 flag((std::atomic<kmp_uint32> *)&node.dn.npredecessors, 0U);
Jonathan Peyton30419822017-05-12 18:01:32 +0000663 while (node.dn.npredecessors > 0) {
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000664 flag.execute_tasks(thread, gtid, FALSE,
665 &thread_finished USE_ITT_BUILD_ARG(NULL),
Jonathan Peyton30419822017-05-12 18:01:32 +0000666 __kmp_task_stealing_constraint);
667 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000668
Jonathan Peyton30419822017-05-12 18:01:32 +0000669 KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n",
670 gtid, loc_ref));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000671}
672
673#endif /* OMP_40_ENABLED */