blob: bfc637122a2a6c8c85d2eb028ffe9caefc84f345 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_taskq.cpp -- TASKQ support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000017#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000018#include "kmp_i18n.h"
19#include "kmp_io.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000020
21#define MAX_MESSAGE 512
22
Jonathan Peyton30419822017-05-12 18:01:32 +000023/* Taskq routines and global variables */
Jim Cownie5e8470a2013-09-27 10:38:44 +000024
Jonathan Peyton30419822017-05-12 18:01:32 +000025#define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x);
Jim Cownie5e8470a2013-09-27 10:38:44 +000026
27#define THREAD_ALLOC_FOR_TASKQ
28
Jonathan Peyton30419822017-05-12 18:01:32 +000029static int in_parallel_context(kmp_team_t *team) {
30 return !team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +000031}
32
Jonathan Peyton30419822017-05-12 18:01:32 +000033static void __kmp_taskq_eo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
34 int gtid = *gtid_ref;
35 int tid = __kmp_tid_from_gtid(gtid);
36 kmp_uint32 my_token;
37 kmpc_task_queue_t *taskq;
38 kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq;
Jim Cownie5e8470a2013-09-27 10:38:44 +000039
Jonathan Peyton30419822017-05-12 18:01:32 +000040 if (__kmp_env_consistency_check)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000041#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +000042 __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000043#else
Jonathan Peyton30419822017-05-12 18:01:32 +000044 __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000045#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000046
Jonathan Peyton30419822017-05-12 18:01:32 +000047 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) {
48 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +000049
Jonathan Peyton30419822017-05-12 18:01:32 +000050 /* GEH - need check here under stats to make sure */
51 /* inside task (curr_thunk[*tid_ref] != NULL) */
Jim Cownie5e8470a2013-09-27 10:38:44 +000052
Jonathan Peyton30419822017-05-12 18:01:32 +000053 my_token = tq->tq_curr_thunk[tid]->th_tasknum;
Jim Cownie5e8470a2013-09-27 10:38:44 +000054
Jonathan Peyton30419822017-05-12 18:01:32 +000055 taskq = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +000056
Jonathan Peyton30419822017-05-12 18:01:32 +000057 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
58 KMP_MB();
59 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000060}
61
Jonathan Peyton30419822017-05-12 18:01:32 +000062static void __kmp_taskq_xo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
63 int gtid = *gtid_ref;
64 int tid = __kmp_tid_from_gtid(gtid);
65 kmp_uint32 my_token;
66 kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq;
Jim Cownie5e8470a2013-09-27 10:38:44 +000067
Jonathan Peyton30419822017-05-12 18:01:32 +000068 if (__kmp_env_consistency_check)
69 __kmp_pop_sync(gtid, ct_ordered_in_taskq, loc_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +000070
Jonathan Peyton30419822017-05-12 18:01:32 +000071 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) {
72 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +000073
Jonathan Peyton30419822017-05-12 18:01:32 +000074 /* GEH - need check here under stats to make sure */
75 /* inside task (curr_thunk[tid] != NULL) */
Jim Cownie5e8470a2013-09-27 10:38:44 +000076
Jonathan Peyton30419822017-05-12 18:01:32 +000077 my_token = tq->tq_curr_thunk[tid]->th_tasknum;
Jim Cownie5e8470a2013-09-27 10:38:44 +000078
Jonathan Peyton30419822017-05-12 18:01:32 +000079 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +000080
Jonathan Peyton30419822017-05-12 18:01:32 +000081 tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue->tq_tasknum_serving =
82 my_token + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +000083
Jonathan Peyton30419822017-05-12 18:01:32 +000084 KMP_MB(); /* Flush all pending memory write invalidates. */
85 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000086}
87
Jonathan Peyton30419822017-05-12 18:01:32 +000088static void __kmp_taskq_check_ordered(kmp_int32 gtid, kmpc_thunk_t *thunk) {
89 kmp_uint32 my_token;
90 kmpc_task_queue_t *taskq;
Jim Cownie5e8470a2013-09-27 10:38:44 +000091
Jonathan Peyton30419822017-05-12 18:01:32 +000092 /* assume we are always called from an active parallel context */
Jim Cownie5e8470a2013-09-27 10:38:44 +000093
Jonathan Peyton30419822017-05-12 18:01:32 +000094 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +000095
Jonathan Peyton30419822017-05-12 18:01:32 +000096 my_token = thunk->th_tasknum;
Jim Cownie5e8470a2013-09-27 10:38:44 +000097
Jonathan Peyton30419822017-05-12 18:01:32 +000098 taskq = thunk->th.th_shareds->sv_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +000099
Jonathan Peyton30419822017-05-12 18:01:32 +0000100 if (taskq->tq_tasknum_serving <= my_token) {
101 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
102 KMP_MB();
103 taskq->tq_tasknum_serving = my_token + 1;
104 KMP_MB();
105 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000106}
107
Jonathan Peyton2321d572015-06-08 19:25:25 +0000108#ifdef KMP_DEBUG
109
Jonathan Peyton30419822017-05-12 18:01:32 +0000110static void __kmp_dump_TQF(kmp_int32 flags) {
111 if (flags & TQF_IS_ORDERED)
112 __kmp_printf("ORDERED ");
113 if (flags & TQF_IS_LASTPRIVATE)
114 __kmp_printf("LAST_PRIV ");
115 if (flags & TQF_IS_NOWAIT)
116 __kmp_printf("NOWAIT ");
117 if (flags & TQF_HEURISTICS)
118 __kmp_printf("HEURIST ");
119 if (flags & TQF_INTERFACE_RESERVED1)
120 __kmp_printf("RESERV1 ");
121 if (flags & TQF_INTERFACE_RESERVED2)
122 __kmp_printf("RESERV2 ");
123 if (flags & TQF_INTERFACE_RESERVED3)
124 __kmp_printf("RESERV3 ");
125 if (flags & TQF_INTERFACE_RESERVED4)
126 __kmp_printf("RESERV4 ");
127 if (flags & TQF_IS_LAST_TASK)
128 __kmp_printf("LAST_TASK ");
129 if (flags & TQF_TASKQ_TASK)
130 __kmp_printf("TASKQ_TASK ");
131 if (flags & TQF_RELEASE_WORKERS)
132 __kmp_printf("RELEASE ");
133 if (flags & TQF_ALL_TASKS_QUEUED)
134 __kmp_printf("ALL_QUEUED ");
135 if (flags & TQF_PARALLEL_CONTEXT)
136 __kmp_printf("PARALLEL ");
137 if (flags & TQF_DEALLOCATED)
138 __kmp_printf("DEALLOC ");
139 if (!(flags & (TQF_INTERNAL_FLAGS | TQF_INTERFACE_FLAGS)))
140 __kmp_printf("(NONE)");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000141}
142
Jonathan Peyton30419822017-05-12 18:01:32 +0000143static void __kmp_dump_thunk(kmp_taskq_t *tq, kmpc_thunk_t *thunk,
144 kmp_int32 global_tid) {
145 int i;
146 int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000147
Jonathan Peyton30419822017-05-12 18:01:32 +0000148 __kmp_printf("\tThunk at %p on (%d): ", thunk, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000149
Jonathan Peyton30419822017-05-12 18:01:32 +0000150 if (thunk != NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000151 for (i = 0; i < nproc; i++) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000152 if (tq->tq_curr_thunk[i] == thunk) {
153 __kmp_printf("[%i] ", i);
154 }
155 }
156 __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds);
157 __kmp_printf("th_task=%p, ", thunk->th_task);
158 __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk);
159 __kmp_printf("th_status=%d, ", thunk->th_status);
160 __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum);
161 __kmp_printf("th_flags=");
162 __kmp_dump_TQF(thunk->th_flags);
163 }
164
165 __kmp_printf("\n");
166}
167
168static void __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num) {
169 kmpc_thunk_t *th;
170
171 __kmp_printf(" Thunk stack for T#%d: ", thread_num);
172
173 for (th = thunk; th != NULL; th = th->th_encl_thunk)
174 __kmp_printf("%p ", th);
175
176 __kmp_printf("\n");
177}
178
179static void __kmp_dump_task_queue(kmp_taskq_t *tq, kmpc_task_queue_t *queue,
180 kmp_int32 global_tid) {
181 int qs, count, i;
182 kmpc_thunk_t *thunk;
183 kmpc_task_queue_t *taskq;
184
185 __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid);
186
187 if (queue != NULL) {
188 int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
189
190 if (__kmp_env_consistency_check) {
191 __kmp_printf(" tq_loc : ");
192 }
193 if (in_parallel) {
194
195 // if (queue->tq.tq_parent != 0)
196 //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
197
198 //__kmp_acquire_lock(& queue->tq_link_lck, global_tid);
199
200 // Make sure data structures are in consistent state before querying them
201 // Seems to work without this for digital/alpha, needed for IBM/RS6000
202 KMP_MB();
203
204 __kmp_printf(" tq_parent : %p\n", queue->tq.tq_parent);
205 __kmp_printf(" tq_first_child : %p\n", queue->tq_first_child);
206 __kmp_printf(" tq_next_child : %p\n", queue->tq_next_child);
207 __kmp_printf(" tq_prev_child : %p\n", queue->tq_prev_child);
208 __kmp_printf(" tq_ref_count : %d\n", queue->tq_ref_count);
209
210 //__kmp_release_lock(& queue->tq_link_lck, global_tid);
211
212 // if (queue->tq.tq_parent != 0)
213 //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
214
215 //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
216 //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
217
218 // Make sure data structures are in consistent state before querying them
219 // Seems to work without this for digital/alpha, needed for IBM/RS6000
220 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000221 }
222
Jonathan Peyton30419822017-05-12 18:01:32 +0000223 __kmp_printf(" tq_shareds : ");
224 for (i = 0; i < ((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
225 __kmp_printf("%p ", queue->tq_shareds[i].ai_data);
226 __kmp_printf("\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000227
Jonathan Peyton30419822017-05-12 18:01:32 +0000228 if (in_parallel) {
229 __kmp_printf(" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
230 __kmp_printf(" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000231 }
232
Jonathan Peyton30419822017-05-12 18:01:32 +0000233 __kmp_printf(" tq_queue : %p\n", queue->tq_queue);
234 __kmp_printf(" tq_thunk_space : %p\n", queue->tq_thunk_space);
235 __kmp_printf(" tq_taskq_slot : %p\n", queue->tq_taskq_slot);
236
237 __kmp_printf(" tq_free_thunks : ");
238 for (thunk = queue->tq_free_thunks; thunk != NULL;
239 thunk = thunk->th.th_next_free)
240 __kmp_printf("%p ", thunk);
241 __kmp_printf("\n");
242
243 __kmp_printf(" tq_nslots : %d\n", queue->tq_nslots);
244 __kmp_printf(" tq_head : %d\n", queue->tq_head);
245 __kmp_printf(" tq_tail : %d\n", queue->tq_tail);
246 __kmp_printf(" tq_nfull : %d\n", queue->tq_nfull);
247 __kmp_printf(" tq_hiwat : %d\n", queue->tq_hiwat);
248 __kmp_printf(" tq_flags : ");
249 __kmp_dump_TQF(queue->tq_flags);
250 __kmp_printf("\n");
251
252 if (in_parallel) {
253 __kmp_printf(" tq_th_thunks : ");
254 for (i = 0; i < queue->tq_nproc; i++) {
255 __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data);
256 }
257 __kmp_printf("\n");
258 }
259
260 __kmp_printf("\n");
261 __kmp_printf(" Queue slots:\n");
262
263 qs = queue->tq_tail;
264 for (count = 0; count < queue->tq_nfull; ++count) {
265 __kmp_printf("(%d)", qs);
266 __kmp_dump_thunk(tq, queue->tq_queue[qs].qs_thunk, global_tid);
267 qs = (qs + 1) % queue->tq_nslots;
268 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000269
270 __kmp_printf("\n");
271
Jim Cownie5e8470a2013-09-27 10:38:44 +0000272 if (in_parallel) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000273 if (queue->tq_taskq_slot != NULL) {
274 __kmp_printf(" TaskQ slot:\n");
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000275 __kmp_dump_thunk(tq, CCAST(kmpc_thunk_t *, queue->tq_taskq_slot),
276 global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000277 __kmp_printf("\n");
278 }
279 //__kmp_release_lock(& queue->tq_queue_lck, global_tid);
280 //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000281 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000282 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000283
Jonathan Peyton30419822017-05-12 18:01:32 +0000284 __kmp_printf(" Taskq freelist: ");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000285
Jonathan Peyton30419822017-05-12 18:01:32 +0000286 //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000287
Jonathan Peyton30419822017-05-12 18:01:32 +0000288 // Make sure data structures are in consistent state before querying them
289 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
290 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000291
Jonathan Peyton30419822017-05-12 18:01:32 +0000292 for (taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free)
293 __kmp_printf("%p ", taskq);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000294
Jonathan Peyton30419822017-05-12 18:01:32 +0000295 //__kmp_release_lock( & tq->tq_freelist_lck, global_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000296
Jonathan Peyton30419822017-05-12 18:01:32 +0000297 __kmp_printf("\n\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000298}
299
Jonathan Peyton30419822017-05-12 18:01:32 +0000300static void __kmp_aux_dump_task_queue_tree(kmp_taskq_t *tq,
301 kmpc_task_queue_t *curr_queue,
302 kmp_int32 level,
303 kmp_int32 global_tid) {
304 int i, count, qs;
305 int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
306 kmpc_task_queue_t *queue = curr_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000307
Jonathan Peyton30419822017-05-12 18:01:32 +0000308 if (curr_queue == NULL)
309 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000310
Jonathan Peyton30419822017-05-12 18:01:32 +0000311 __kmp_printf(" ");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000312
Jonathan Peyton30419822017-05-12 18:01:32 +0000313 for (i = 0; i < level; i++)
314 __kmp_printf(" ");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000315
Jonathan Peyton30419822017-05-12 18:01:32 +0000316 __kmp_printf("%p", curr_queue);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000317
Jonathan Peyton30419822017-05-12 18:01:32 +0000318 for (i = 0; i < nproc; i++) {
319 if (tq->tq_curr_thunk[i] &&
320 tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue) {
321 __kmp_printf(" [%i]", i);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000322 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000323 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000324
Jonathan Peyton30419822017-05-12 18:01:32 +0000325 __kmp_printf(":");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000326
Jonathan Peyton30419822017-05-12 18:01:32 +0000327 //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000328
Jonathan Peyton30419822017-05-12 18:01:32 +0000329 // Make sure data structures are in consistent state before querying them
330 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
331 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000332
Jonathan Peyton30419822017-05-12 18:01:32 +0000333 qs = curr_queue->tq_tail;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000334
Jonathan Peyton30419822017-05-12 18:01:32 +0000335 for (count = 0; count < curr_queue->tq_nfull; ++count) {
336 __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk);
337 qs = (qs + 1) % curr_queue->tq_nslots;
338 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000339
Jonathan Peyton30419822017-05-12 18:01:32 +0000340 //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000341
Jonathan Peyton30419822017-05-12 18:01:32 +0000342 __kmp_printf("\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000343
Jonathan Peyton30419822017-05-12 18:01:32 +0000344 if (curr_queue->tq_first_child) {
345 //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000346
Jonathan Peyton30419822017-05-12 18:01:32 +0000347 // Make sure data structures are in consistent state before querying them
348 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
Jim Cownie5e8470a2013-09-27 10:38:44 +0000349 KMP_MB();
350
Jonathan Peyton30419822017-05-12 18:01:32 +0000351 if (curr_queue->tq_first_child) {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000352 for (queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child);
Jonathan Peyton30419822017-05-12 18:01:32 +0000353 queue != NULL; queue = queue->tq_next_child) {
354 __kmp_aux_dump_task_queue_tree(tq, queue, level + 1, global_tid);
355 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000356 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000357
Jonathan Peyton30419822017-05-12 18:01:32 +0000358 //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
359 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000360}
361
Jonathan Peyton30419822017-05-12 18:01:32 +0000362static void __kmp_dump_task_queue_tree(kmp_taskq_t *tq,
363 kmpc_task_queue_t *tqroot,
364 kmp_int32 global_tid) {
365 __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
366
367 __kmp_aux_dump_task_queue_tree(tq, tqroot, 0, global_tid);
368
369 __kmp_printf("\n");
370}
371#endif
372
373/* New taskq storage routines that try to minimize overhead of mallocs but
374 still provide cache line alignment. */
375static void *__kmp_taskq_allocate(size_t size, kmp_int32 global_tid) {
376 void *addr, *orig_addr;
377 size_t bytes;
378
379 KB_TRACE(5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int)size,
380 global_tid));
381
382 bytes = sizeof(void *) + CACHE_LINE + size;
383
384#ifdef THREAD_ALLOC_FOR_TASKQ
385 orig_addr =
386 (void *)__kmp_thread_malloc(__kmp_thread_from_gtid(global_tid), bytes);
387#else
388 KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", bytes));
389 orig_addr = (void *)KMP_INTERNAL_MALLOC(bytes);
390#endif /* THREAD_ALLOC_FOR_TASKQ */
391
392 if (orig_addr == 0)
393 KMP_FATAL(OutOfHeapMemory);
394
395 addr = orig_addr;
396
397 if (((kmp_uintptr_t)addr & (CACHE_LINE - 1)) != 0) {
398 KB_TRACE(50, ("__kmp_taskq_allocate: adjust for cache alignment\n"));
399 addr = (void *)(((kmp_uintptr_t)addr + CACHE_LINE) & ~(CACHE_LINE - 1));
400 }
401
402 (*(void **)addr) = orig_addr;
403
404 KB_TRACE(10,
405 ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, "
406 "gtid: %d\n",
407 orig_addr, ((void **)addr) + 1,
408 ((char *)(((void **)addr) + 1)) + size - 1, (int)size, global_tid));
409
410 return (((void **)addr) + 1);
411}
412
413static void __kmpc_taskq_free(void *p, kmp_int32 global_tid) {
414 KB_TRACE(5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid));
415
416 KB_TRACE(10, ("__kmpc_taskq_free: freeing: %p, gtid: %d\n",
417 (*(((void **)p) - 1)), global_tid));
418
419#ifdef THREAD_ALLOC_FOR_TASKQ
420 __kmp_thread_free(__kmp_thread_from_gtid(global_tid), *(((void **)p) - 1));
421#else
422 KMP_INTERNAL_FREE(*(((void **)p) - 1));
423#endif /* THREAD_ALLOC_FOR_TASKQ */
424}
425
426/* Keep freed kmpc_task_queue_t on an internal freelist and recycle since
427 they're of constant size. */
428
429static kmpc_task_queue_t *
430__kmp_alloc_taskq(kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots,
431 kmp_int32 nthunks, kmp_int32 nshareds, kmp_int32 nproc,
432 size_t sizeof_thunk, size_t sizeof_shareds,
433 kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid) {
434 kmp_int32 i;
435 size_t bytes;
436 kmpc_task_queue_t *new_queue;
437 kmpc_aligned_shared_vars_t *shared_var_array;
438 char *shared_var_storage;
439 char *pt; /* for doing byte-adjusted address computations */
440
441 __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid);
442
443 // Make sure data structures are in consistent state before querying them
444 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
445 KMP_MB();
446
447 if (tq->tq_freelist) {
448 new_queue = tq->tq_freelist;
449 tq->tq_freelist = tq->tq_freelist->tq.tq_next_free;
450
451 KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
452
453 new_queue->tq_flags = 0;
454
455 __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
456 } else {
457 __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
458
459 new_queue = (kmpc_task_queue_t *)__kmp_taskq_allocate(
460 sizeof(kmpc_task_queue_t), global_tid);
461 new_queue->tq_flags = 0;
462 }
463
464 /* space in the task queue for queue slots (allocate as one big chunk */
465 /* of storage including new_taskq_task space) */
466
467 sizeof_thunk +=
468 (CACHE_LINE - (sizeof_thunk % CACHE_LINE)); /* pad to cache line size */
469 pt = (char *)__kmp_taskq_allocate(nthunks * sizeof_thunk, global_tid);
470 new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
471 *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
472
473 /* chain the allocated thunks into a freelist for this queue */
474
475 new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
476
477 for (i = 0; i < (nthunks - 2); i++) {
478 ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th.th_next_free =
479 (kmpc_thunk_t *)(pt + (i + 1) * sizeof_thunk);
480#ifdef KMP_DEBUG
481 ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th_flags = TQF_DEALLOCATED;
482#endif
483 }
484
485 ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th.th_next_free = NULL;
486#ifdef KMP_DEBUG
487 ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th_flags =
488 TQF_DEALLOCATED;
489#endif
490
491 /* initialize the locks */
492
493 if (in_parallel) {
494 __kmp_init_lock(&new_queue->tq_link_lck);
495 __kmp_init_lock(&new_queue->tq_free_thunks_lck);
496 __kmp_init_lock(&new_queue->tq_queue_lck);
497 }
498
499 /* now allocate the slots */
500
501 bytes = nslots * sizeof(kmpc_aligned_queue_slot_t);
502 new_queue->tq_queue =
503 (kmpc_aligned_queue_slot_t *)__kmp_taskq_allocate(bytes, global_tid);
504
505 /* space for array of pointers to shared variable structures */
506 sizeof_shareds += sizeof(kmpc_task_queue_t *);
507 sizeof_shareds +=
508 (CACHE_LINE - (sizeof_shareds % CACHE_LINE)); /* pad to cache line size */
509
510 bytes = nshareds * sizeof(kmpc_aligned_shared_vars_t);
511 shared_var_array =
512 (kmpc_aligned_shared_vars_t *)__kmp_taskq_allocate(bytes, global_tid);
513
514 bytes = nshareds * sizeof_shareds;
515 shared_var_storage = (char *)__kmp_taskq_allocate(bytes, global_tid);
516
517 for (i = 0; i < nshareds; i++) {
518 shared_var_array[i].ai_data =
519 (kmpc_shared_vars_t *)(shared_var_storage + i * sizeof_shareds);
520 shared_var_array[i].ai_data->sv_queue = new_queue;
521 }
522 new_queue->tq_shareds = shared_var_array;
523
524 /* array for number of outstanding thunks per thread */
525
526 if (in_parallel) {
527 bytes = nproc * sizeof(kmpc_aligned_int32_t);
528 new_queue->tq_th_thunks =
529 (kmpc_aligned_int32_t *)__kmp_taskq_allocate(bytes, global_tid);
530 new_queue->tq_nproc = nproc;
531
532 for (i = 0; i < nproc; i++)
533 new_queue->tq_th_thunks[i].ai_data = 0;
534 }
535
536 return new_queue;
537}
538
539static void __kmp_free_taskq(kmp_taskq_t *tq, kmpc_task_queue_t *p,
540 int in_parallel, kmp_int32 global_tid) {
541 __kmpc_taskq_free(p->tq_thunk_space, global_tid);
542 __kmpc_taskq_free(p->tq_queue, global_tid);
543
544 /* free shared var structure storage */
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000545 __kmpc_taskq_free(CCAST(kmpc_shared_vars_t *, p->tq_shareds[0].ai_data),
546 global_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +0000547 /* free array of pointers to shared vars storage */
548 __kmpc_taskq_free(p->tq_shareds, global_tid);
549
550#ifdef KMP_DEBUG
551 p->tq_first_child = NULL;
552 p->tq_next_child = NULL;
553 p->tq_prev_child = NULL;
554 p->tq_ref_count = -10;
555 p->tq_shareds = NULL;
556 p->tq_tasknum_queuing = 0;
557 p->tq_tasknum_serving = 0;
558 p->tq_queue = NULL;
559 p->tq_thunk_space = NULL;
560 p->tq_taskq_slot = NULL;
561 p->tq_free_thunks = NULL;
562 p->tq_nslots = 0;
563 p->tq_head = 0;
564 p->tq_tail = 0;
565 p->tq_nfull = 0;
566 p->tq_hiwat = 0;
567
568 if (in_parallel) {
569 int i;
570
571 for (i = 0; i < p->tq_nproc; i++)
572 p->tq_th_thunks[i].ai_data = 0;
573 }
574 if (__kmp_env_consistency_check)
575 p->tq_loc = NULL;
576 KMP_DEBUG_ASSERT(p->tq_flags & TQF_DEALLOCATED);
577 p->tq_flags = TQF_DEALLOCATED;
578#endif /* KMP_DEBUG */
579
580 if (in_parallel) {
581 __kmpc_taskq_free(p->tq_th_thunks, global_tid);
582 __kmp_destroy_lock(&p->tq_link_lck);
583 __kmp_destroy_lock(&p->tq_queue_lck);
584 __kmp_destroy_lock(&p->tq_free_thunks_lck);
585 }
586#ifdef KMP_DEBUG
587 p->tq_th_thunks = NULL;
588#endif /* KMP_DEBUG */
589
590 // Make sure data structures are in consistent state before querying them
591 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
592 KMP_MB();
593
594 __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid);
595 p->tq.tq_next_free = tq->tq_freelist;
596
597 tq->tq_freelist = p;
598 __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
599}
600
601/* Once a group of thunks has been allocated for use in a particular queue,
602 these are managed via a per-queue freelist.
603 We force a check that there's always a thunk free if we need one. */
604
605static kmpc_thunk_t *__kmp_alloc_thunk(kmpc_task_queue_t *queue,
606 int in_parallel, kmp_int32 global_tid) {
607 kmpc_thunk_t *fl;
608
609 if (in_parallel) {
610 __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid);
611 // Make sure data structures are in consistent state before querying them
612 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
613 KMP_MB();
614 }
615
616 fl = queue->tq_free_thunks;
617
618 KMP_DEBUG_ASSERT(fl != NULL);
619
620 queue->tq_free_thunks = fl->th.th_next_free;
621 fl->th_flags = 0;
622
623 if (in_parallel)
624 __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid);
625
626 return fl;
627}
628
629static void __kmp_free_thunk(kmpc_task_queue_t *queue, kmpc_thunk_t *p,
630 int in_parallel, kmp_int32 global_tid) {
631#ifdef KMP_DEBUG
632 p->th_task = 0;
633 p->th_encl_thunk = 0;
634 p->th_status = 0;
635 p->th_tasknum = 0;
636/* Also could zero pointers to private vars */
637#endif
638
639 if (in_parallel) {
640 __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid);
641 // Make sure data structures are in consistent state before querying them
642 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
643 KMP_MB();
644 }
645
646 p->th.th_next_free = queue->tq_free_thunks;
647 queue->tq_free_thunks = p;
648
649#ifdef KMP_DEBUG
650 p->th_flags = TQF_DEALLOCATED;
651#endif
652
653 if (in_parallel)
654 __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid);
655}
656
657/* returns nonzero if the queue just became full after the enqueue */
658static kmp_int32 __kmp_enqueue_task(kmp_taskq_t *tq, kmp_int32 global_tid,
659 kmpc_task_queue_t *queue,
660 kmpc_thunk_t *thunk, int in_parallel) {
661 kmp_int32 ret;
662
663 /* dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the
664 * master is executing then) */
665 if (in_parallel) {
666 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
667 // Make sure data structures are in consistent state before querying them
668 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
669 KMP_MB();
670 }
671
672 KMP_DEBUG_ASSERT(queue->tq_nfull < queue->tq_nslots); // check queue not full
673
674 queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
675
676 if (queue->tq_head >= queue->tq_nslots)
677 queue->tq_head = 0;
678
679 (queue->tq_nfull)++;
680
681 KMP_MB(); /* to assure that nfull is seen to increase before
682 TQF_ALL_TASKS_QUEUED is set */
683
684 ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
685
686 if (in_parallel) {
687 /* don't need to wait until workers are released before unlocking */
688 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
689
690 if (tq->tq_global_flags & TQF_RELEASE_WORKERS) {
691 // If just creating the root queue, the worker threads are waiting at a
692 // join barrier until now, when there's something in the queue for them to
693 // do; release them now to do work. This should only be done when this is
694 // the first task enqueued, so reset the flag here also.
695 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; /* no lock needed, workers
696 are still in spin mode */
697 // avoid releasing barrier twice if taskq_task switches threads
698 KMP_MB();
699
700 __kmpc_end_barrier_master(NULL, global_tid);
701 }
702 }
703
704 return ret;
705}
706
707static kmpc_thunk_t *__kmp_dequeue_task(kmp_int32 global_tid,
708 kmpc_task_queue_t *queue,
709 int in_parallel) {
710 kmpc_thunk_t *pt;
711 int tid = __kmp_tid_from_gtid(global_tid);
712
713 KMP_DEBUG_ASSERT(queue->tq_nfull > 0); /* check queue not empty */
714
715 if (queue->tq.tq_parent != NULL && in_parallel) {
716 int ct;
717 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
718 ct = ++(queue->tq_ref_count);
719 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
720 KMP_DEBUG_REF_CTS(
721 ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
722 }
723
724 pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
725
726 if (queue->tq_tail >= queue->tq_nslots)
727 queue->tq_tail = 0;
728
729 if (in_parallel) {
730 queue->tq_th_thunks[tid].ai_data++;
731
732 KMP_MB(); /* necessary so ai_data increment is propagated to other threads
733 immediately (digital) */
734
735 KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding "
736 "thunks from queue %p\n",
737 global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
738 }
739
740 (queue->tq_nfull)--;
741
742#ifdef KMP_DEBUG
743 KMP_MB();
744
745 /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is
746 * decremented */
747
748 KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
749
750 if (in_parallel) {
751 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <=
752 __KMP_TASKQ_THUNKS_PER_TH);
753 }
754#endif
755
756 return pt;
757}
758
759/* Find the next (non-null) task to dequeue and return it.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000760 * This is never called unless in_parallel=TRUE
761 *
762 * Here are the rules for deciding which queue to take the task from:
763 * 1. Walk up the task queue tree from the current queue's parent and look
764 * on the way up (for loop, below).
765 * 2. Do a depth-first search back down the tree from the root and
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000766 * look (find_task_in_descendant_queue()).
Jim Cownie5e8470a2013-09-27 10:38:44 +0000767 *
768 * Here are the rules for deciding which task to take from a queue
769 * (__kmp_find_task_in_queue ()):
770 * 1. Never take the last task from a queue if TQF_IS_LASTPRIVATE; this task
771 * must be staged to make sure we execute the last one with
772 * TQF_IS_LAST_TASK at the end of task queue execution.
773 * 2. If the queue length is below some high water mark and the taskq task
774 * is enqueued, prefer running the taskq task.
775 * 3. Otherwise, take a (normal) task from the queue.
776 *
777 * If we do all this and return pt == NULL at the bottom of this routine,
778 * this means there are no more tasks to execute (except possibly for
779 * TQF_IS_LASTPRIVATE).
780 */
781
Jonathan Peyton30419822017-05-12 18:01:32 +0000782static kmpc_thunk_t *__kmp_find_task_in_queue(kmp_int32 global_tid,
783 kmpc_task_queue_t *queue) {
784 kmpc_thunk_t *pt = NULL;
785 int tid = __kmp_tid_from_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000786
Jonathan Peyton30419822017-05-12 18:01:32 +0000787 /* To prevent deadlock from tq_queue_lck if queue already deallocated */
788 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000789
Jonathan Peyton30419822017-05-12 18:01:32 +0000790 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000791
Jonathan Peyton30419822017-05-12 18:01:32 +0000792 /* Check again to avoid race in __kmpc_end_taskq() */
793 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
794 // Make sure data structures are in consistent state before querying them
795 // Seems to work without this for digital/alpha, needed for IBM/RS6000
796 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000797
Jonathan Peyton30419822017-05-12 18:01:32 +0000798 if ((queue->tq_taskq_slot != NULL) &&
799 (queue->tq_nfull <= queue->tq_hiwat)) {
800 /* if there's enough room in the queue and the dispatcher */
801 /* (taskq task) is available, schedule more tasks */
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000802 pt = CCAST(kmpc_thunk_t *, queue->tq_taskq_slot);
Jonathan Peyton30419822017-05-12 18:01:32 +0000803 queue->tq_taskq_slot = NULL;
804 } else if (queue->tq_nfull == 0 ||
805 queue->tq_th_thunks[tid].ai_data >=
806 __KMP_TASKQ_THUNKS_PER_TH) {
807 /* do nothing if no thunks available or this thread can't */
808 /* run any because it already is executing too many */
809 pt = NULL;
810 } else if (queue->tq_nfull > 1) {
811 /* always safe to schedule a task even if TQF_IS_LASTPRIVATE */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000812
Jonathan Peyton30419822017-05-12 18:01:32 +0000813 pt = __kmp_dequeue_task(global_tid, queue, TRUE);
814 } else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
815 // one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE
816 pt = __kmp_dequeue_task(global_tid, queue, TRUE);
817 } else if (queue->tq_flags & TQF_IS_LAST_TASK) {
818 /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task() */
819 /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
820 /* instrumentation does copy-out. */
821 pt = __kmp_dequeue_task(global_tid, queue, TRUE);
822 pt->th_flags |=
823 TQF_IS_LAST_TASK; /* don't need test_then_or since already locked */
824 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000825 }
826
Jonathan Peyton30419822017-05-12 18:01:32 +0000827 /* GEH - What happens here if is lastprivate, but not last task? */
828 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
829 }
830
831 return pt;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000832}
833
Jonathan Peyton30419822017-05-12 18:01:32 +0000834/* Walk a tree of queues starting at queue's first child and return a non-NULL
835 thunk if one can be scheduled. Must only be called when in_parallel=TRUE */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000836
837static kmpc_thunk_t *
Jonathan Peyton30419822017-05-12 18:01:32 +0000838__kmp_find_task_in_descendant_queue(kmp_int32 global_tid,
839 kmpc_task_queue_t *curr_queue) {
840 kmpc_thunk_t *pt = NULL;
841 kmpc_task_queue_t *queue = curr_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000842
Jonathan Peyton30419822017-05-12 18:01:32 +0000843 if (curr_queue->tq_first_child != NULL) {
844 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
845 // Make sure data structures are in consistent state before querying them
846 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
847 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000848
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000849 queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child);
Jonathan Peyton30419822017-05-12 18:01:32 +0000850 if (queue == NULL) {
851 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
852 return NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000853 }
854
Jonathan Peyton30419822017-05-12 18:01:32 +0000855 while (queue != NULL) {
856 int ct;
857 kmpc_task_queue_t *next;
858
859 ct = ++(queue->tq_ref_count);
860 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
861 KMP_DEBUG_REF_CTS(
862 ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
863
864 pt = __kmp_find_task_in_queue(global_tid, queue);
865
866 if (pt != NULL) {
867 int ct;
868
869 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
870 // Make sure data structures in consistent state before querying them
871 // Seems to work without this for digital/alpha, needed for IBM/RS6000
872 KMP_MB();
873
874 ct = --(queue->tq_ref_count);
875 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
876 global_tid, queue, ct));
877 KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0);
878
879 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
880
881 return pt;
882 }
883
884 /* although reference count stays active during descendant walk, shouldn't
885 matter since if children still exist, reference counts aren't being
886 monitored anyway */
887
888 pt = __kmp_find_task_in_descendant_queue(global_tid, queue);
889
890 if (pt != NULL) {
891 int ct;
892
893 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
894 // Make sure data structures in consistent state before querying them
895 // Seems to work without this for digital/alpha, needed for IBM/RS6000
896 KMP_MB();
897
898 ct = --(queue->tq_ref_count);
899 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
900 global_tid, queue, ct));
901 KMP_DEBUG_ASSERT(ct >= 0);
902
903 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
904
905 return pt;
906 }
907
908 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
909 // Make sure data structures in consistent state before querying them
910 // Seems to work without this for digital/alpha, needed for IBM/RS6000
911 KMP_MB();
912
913 next = queue->tq_next_child;
914
915 ct = --(queue->tq_ref_count);
916 KMP_DEBUG_REF_CTS(
917 ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
918 KMP_DEBUG_ASSERT(ct >= 0);
919
920 queue = next;
921 }
922
923 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
924 }
925
926 return pt;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000927}
928
Jonathan Peyton30419822017-05-12 18:01:32 +0000929/* Walk up the taskq tree looking for a task to execute. If we get to the root,
930 search the tree for a descendent queue task. Must only be called when
931 in_parallel=TRUE */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000932static kmpc_thunk_t *
Jonathan Peyton30419822017-05-12 18:01:32 +0000933__kmp_find_task_in_ancestor_queue(kmp_taskq_t *tq, kmp_int32 global_tid,
934 kmpc_task_queue_t *curr_queue) {
935 kmpc_task_queue_t *queue;
936 kmpc_thunk_t *pt;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000937
Jonathan Peyton30419822017-05-12 18:01:32 +0000938 pt = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000939
Jonathan Peyton30419822017-05-12 18:01:32 +0000940 if (curr_queue->tq.tq_parent != NULL) {
941 queue = curr_queue->tq.tq_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000942
Jonathan Peyton30419822017-05-12 18:01:32 +0000943 while (queue != NULL) {
944 if (queue->tq.tq_parent != NULL) {
945 int ct;
946 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
947 // Make sure data structures in consistent state before querying them
948 // Seems to work without this for digital/alpha, needed for IBM/RS6000
949 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000950
Jonathan Peyton30419822017-05-12 18:01:32 +0000951 ct = ++(queue->tq_ref_count);
952 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
953 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", __LINE__,
954 global_tid, queue, ct));
955 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000956
Jonathan Peyton30419822017-05-12 18:01:32 +0000957 pt = __kmp_find_task_in_queue(global_tid, queue);
958 if (pt != NULL) {
959 if (queue->tq.tq_parent != NULL) {
960 int ct;
961 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
962 // Make sure data structures in consistent state before querying them
963 // Seems to work without this for digital/alpha, needed for IBM/RS6000
964 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000965
Jonathan Peyton30419822017-05-12 18:01:32 +0000966 ct = --(queue->tq_ref_count);
967 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
968 global_tid, queue, ct));
969 KMP_DEBUG_ASSERT(ct >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000970
Jonathan Peyton30419822017-05-12 18:01:32 +0000971 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000972 }
973
Jonathan Peyton30419822017-05-12 18:01:32 +0000974 return pt;
975 }
976
977 if (queue->tq.tq_parent != NULL) {
978 int ct;
979 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
980 // Make sure data structures in consistent state before querying them
981 // Seems to work without this for digital/alpha, needed for IBM/RS6000
982 KMP_MB();
983
984 ct = --(queue->tq_ref_count);
985 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
986 global_tid, queue, ct));
987 KMP_DEBUG_ASSERT(ct >= 0);
988 }
989 queue = queue->tq.tq_parent;
990
991 if (queue != NULL)
992 __kmp_release_lock(&queue->tq_link_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000993 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000994 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000995
Jonathan Peyton30419822017-05-12 18:01:32 +0000996 pt = __kmp_find_task_in_descendant_queue(global_tid, tq->tq_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000997
Jonathan Peyton30419822017-05-12 18:01:32 +0000998 return pt;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000999}
1000
Jonathan Peyton30419822017-05-12 18:01:32 +00001001static int __kmp_taskq_tasks_finished(kmpc_task_queue_t *queue) {
1002 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001003
Jonathan Peyton30419822017-05-12 18:01:32 +00001004 /* KMP_MB(); */ /* is this really necessary? */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001005
Jonathan Peyton30419822017-05-12 18:01:32 +00001006 for (i = 0; i < queue->tq_nproc; i++) {
1007 if (queue->tq_th_thunks[i].ai_data != 0)
1008 return FALSE;
1009 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001010
Jonathan Peyton30419822017-05-12 18:01:32 +00001011 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001012}
1013
Jonathan Peyton30419822017-05-12 18:01:32 +00001014static int __kmp_taskq_has_any_children(kmpc_task_queue_t *queue) {
1015 return (queue->tq_first_child != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001016}
1017
Jonathan Peyton30419822017-05-12 18:01:32 +00001018static void __kmp_remove_queue_from_tree(kmp_taskq_t *tq, kmp_int32 global_tid,
1019 kmpc_task_queue_t *queue,
1020 int in_parallel) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001021#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001022 kmp_int32 i;
1023 kmpc_thunk_t *thunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001024#endif
1025
Jonathan Peyton30419822017-05-12 18:01:32 +00001026 KF_TRACE(50,
1027 ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1028 KF_DUMP(50, __kmp_dump_task_queue(tq, queue, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001029
Jonathan Peyton30419822017-05-12 18:01:32 +00001030 /* sub-queue in a recursion, not the root task queue */
1031 KMP_DEBUG_ASSERT(queue->tq.tq_parent != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001032
Jonathan Peyton30419822017-05-12 18:01:32 +00001033 if (in_parallel) {
1034 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1035 // Make sure data structures are in consistent state before querying them
1036 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
1037 KMP_MB();
1038 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001039
Jonathan Peyton30419822017-05-12 18:01:32 +00001040 KMP_DEBUG_ASSERT(queue->tq_first_child == NULL);
1041
1042 /* unlink queue from its siblings if any at this level */
1043 if (queue->tq_prev_child != NULL)
1044 queue->tq_prev_child->tq_next_child = queue->tq_next_child;
1045 if (queue->tq_next_child != NULL)
1046 queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
1047 if (queue->tq.tq_parent->tq_first_child == queue)
1048 queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
1049
1050 queue->tq_prev_child = NULL;
1051 queue->tq_next_child = NULL;
1052
1053 if (in_parallel) {
1054 KMP_DEBUG_REF_CTS(
1055 ("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
1056 __LINE__, global_tid, queue, queue->tq_ref_count));
1057
1058 /* wait until all other threads have stopped accessing this queue */
1059 while (queue->tq_ref_count > 1) {
1060 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1061
1062 KMP_WAIT_YIELD((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE,
1063 NULL);
1064
1065 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1066 // Make sure data structures are in consistent state before querying them
1067 // Seems to work without this for digital/alpha, needed for IBM/RS6000
1068 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001069 }
1070
Jonathan Peyton30419822017-05-12 18:01:32 +00001071 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1072 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001073
Jonathan Peyton30419822017-05-12 18:01:32 +00001074 KMP_DEBUG_REF_CTS(
1075 ("line %d gtid %d: Q %p freeing queue\n", __LINE__, global_tid, queue));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001076
1077#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001078 KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
1079 KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001080
Jonathan Peyton30419822017-05-12 18:01:32 +00001081 for (i = 0; i < queue->tq_nproc; i++) {
1082 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1083 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001084
Jonathan Peyton30419822017-05-12 18:01:32 +00001085 i = 0;
1086 for (thunk = queue->tq_free_thunks; thunk != NULL;
1087 thunk = thunk->th.th_next_free)
1088 ++i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001089
Jonathan Peyton30419822017-05-12 18:01:32 +00001090 KMP_ASSERT(i ==
1091 queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001092#endif
1093
Jonathan Peyton30419822017-05-12 18:01:32 +00001094 /* release storage for queue entry */
1095 __kmp_free_taskq(tq, queue, TRUE, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001096
Jonathan Peyton30419822017-05-12 18:01:32 +00001097 KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1098 KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001099}
1100
Jonathan Peyton30419822017-05-12 18:01:32 +00001101/* Starting from indicated queue, proceed downward through tree and remove all
1102 taskqs which are finished, but only go down to taskqs which have the "nowait"
1103 clause present. Assume this is only called when in_parallel=TRUE. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001104
Jonathan Peyton30419822017-05-12 18:01:32 +00001105static void __kmp_find_and_remove_finished_child_taskq(
1106 kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) {
1107 kmpc_task_queue_t *queue = curr_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001108
Jonathan Peyton30419822017-05-12 18:01:32 +00001109 if (curr_queue->tq_first_child != NULL) {
1110 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
1111 // Make sure data structures are in consistent state before querying them
1112 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
1113 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001114
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001115 queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child);
Jonathan Peyton30419822017-05-12 18:01:32 +00001116 if (queue != NULL) {
1117 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
1118 return;
1119 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001120
Jonathan Peyton30419822017-05-12 18:01:32 +00001121 while (queue != NULL) {
1122 kmpc_task_queue_t *next;
1123 int ct = ++(queue->tq_ref_count);
1124 KMP_DEBUG_REF_CTS(
1125 ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001126
Jonathan Peyton30419822017-05-12 18:01:32 +00001127 /* although reference count stays active during descendant walk, */
1128 /* shouldn't matter since if children still exist, reference */
1129 /* counts aren't being monitored anyway */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001130
Jonathan Peyton30419822017-05-12 18:01:32 +00001131 if (queue->tq_flags & TQF_IS_NOWAIT) {
1132 __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001133
Jonathan Peyton30419822017-05-12 18:01:32 +00001134 if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) &&
1135 (queue->tq_nfull == 0) && __kmp_taskq_tasks_finished(queue) &&
1136 !__kmp_taskq_has_any_children(queue)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001137
Jonathan Peyton30419822017-05-12 18:01:32 +00001138 /* Only remove this if we have not already marked it for deallocation.
1139 This should prevent multiple threads from trying to free this. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001140
Jonathan Peyton30419822017-05-12 18:01:32 +00001141 if (__kmp_test_lock(&queue->tq_queue_lck, global_tid)) {
1142 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
1143 queue->tq_flags |= TQF_DEALLOCATED;
1144 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001145
Jonathan Peyton30419822017-05-12 18:01:32 +00001146 __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001147
Jonathan Peyton30419822017-05-12 18:01:32 +00001148 /* Can't do any more here since can't be sure where sibling queue
1149 * is so just exit this level */
1150 return;
1151 } else {
1152 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001153 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001154 }
1155 /* otherwise, just fall through and decrement reference count */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001156 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001157 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001158
Jonathan Peyton30419822017-05-12 18:01:32 +00001159 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
1160 // Make sure data structures are in consistent state before querying them
1161 // Seems to work without this for digital/alpha, needed for IBM/RS6000
1162 KMP_MB();
1163
1164 next = queue->tq_next_child;
1165
1166 ct = --(queue->tq_ref_count);
1167 KMP_DEBUG_REF_CTS(
1168 ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
1169 KMP_DEBUG_ASSERT(ct >= 0);
1170
1171 queue = next;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001172 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001173
1174 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
1175 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001176}
1177
Jonathan Peyton30419822017-05-12 18:01:32 +00001178/* Starting from indicated queue, proceed downward through tree and remove all
1179 taskq's assuming all are finished and assuming NO other threads are executing
1180 at this point. */
1181static void __kmp_remove_all_child_taskq(kmp_taskq_t *tq, kmp_int32 global_tid,
1182 kmpc_task_queue_t *queue) {
1183 kmpc_task_queue_t *next_child;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001184
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001185 queue = CCAST(kmpc_task_queue_t *, queue->tq_first_child);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001186
Jonathan Peyton30419822017-05-12 18:01:32 +00001187 while (queue != NULL) {
1188 __kmp_remove_all_child_taskq(tq, global_tid, queue);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001189
Jonathan Peyton30419822017-05-12 18:01:32 +00001190 next_child = queue->tq_next_child;
1191 queue->tq_flags |= TQF_DEALLOCATED;
1192 __kmp_remove_queue_from_tree(tq, global_tid, queue, FALSE);
1193 queue = next_child;
1194 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001195}
1196
Jonathan Peyton30419822017-05-12 18:01:32 +00001197static void __kmp_execute_task_from_queue(kmp_taskq_t *tq, ident_t *loc,
1198 kmp_int32 global_tid,
1199 kmpc_thunk_t *thunk,
1200 int in_parallel) {
1201 kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
1202 kmp_int32 tid = __kmp_tid_from_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001203
Jonathan Peyton30419822017-05-12 18:01:32 +00001204 KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid));
1205 KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
1206 KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid));
1207 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001208
Jonathan Peyton30419822017-05-12 18:01:32 +00001209 /* For the taskq task, the curr_thunk pushes and pop pairs are set up as
1210 * follows:
1211 *
1212 * happens exactly once:
1213 * 1) __kmpc_taskq : push (if returning thunk only)
1214 * 4) __kmpc_end_taskq_task : pop
1215 *
1216 * optionally happens *each* time taskq task is dequeued/enqueued:
1217 * 2) __kmpc_taskq_task : pop
1218 * 3) __kmp_execute_task_from_queue : push
1219 *
1220 * execution ordering: 1,(2,3)*,4
1221 */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001222
Jonathan Peyton30419822017-05-12 18:01:32 +00001223 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1224 kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
1225 thunk->th.th_shareds =
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001226 CCAST(kmpc_shared_vars_t *, queue->tq_shareds[index].ai_data);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001227
Jonathan Peyton30419822017-05-12 18:01:32 +00001228 if (__kmp_env_consistency_check) {
1229 __kmp_push_workshare(global_tid,
1230 (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered
1231 : ct_task,
1232 queue->tq_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001233 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001234 } else {
1235 if (__kmp_env_consistency_check)
1236 __kmp_push_workshare(global_tid, ct_taskq, queue->tq_loc);
1237 }
1238
1239 if (in_parallel) {
1240 thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1241 tq->tq_curr_thunk[tid] = thunk;
1242
1243 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1244 }
1245
1246 KF_TRACE(50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk,
1247 queue, global_tid));
1248 thunk->th_task(global_tid, thunk);
1249 KF_TRACE(50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue,
1250 global_tid));
1251
1252 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1253 if (__kmp_env_consistency_check)
1254 __kmp_pop_workshare(global_tid,
1255 (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered
1256 : ct_task,
1257 queue->tq_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001258
1259 if (in_parallel) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001260 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1261 thunk->th_encl_thunk = NULL;
1262 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001263 }
1264
Jonathan Peyton30419822017-05-12 18:01:32 +00001265 if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
1266 __kmp_taskq_check_ordered(global_tid, thunk);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001267 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001268
1269 __kmp_free_thunk(queue, thunk, in_parallel, global_tid);
1270
1271 KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n",
1272 global_tid, thunk));
1273 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1274
1275 if (in_parallel) {
1276 KMP_MB(); /* needed so thunk put on free list before outstanding thunk
1277 count is decremented */
1278
1279 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
1280
1281 KF_TRACE(
1282 200,
1283 ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
1284 global_tid, queue->tq_th_thunks[tid].ai_data - 1, queue));
1285
1286 queue->tq_th_thunks[tid].ai_data--;
1287
1288 /* KMP_MB(); */ /* is MB really necessary ? */
1289 }
1290
1291 if (queue->tq.tq_parent != NULL && in_parallel) {
1292 int ct;
1293 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1294 ct = --(queue->tq_ref_count);
1295 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1296 KMP_DEBUG_REF_CTS(
1297 ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
1298 KMP_DEBUG_ASSERT(ct >= 0);
1299 }
1300 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001301}
1302
Jim Cownie5e8470a2013-09-27 10:38:44 +00001303/* starts a taskq; creates and returns a thunk for the taskq_task */
1304/* also, returns pointer to shared vars for this thread in "shareds" arg */
Jonathan Peyton30419822017-05-12 18:01:32 +00001305kmpc_thunk_t *__kmpc_taskq(ident_t *loc, kmp_int32 global_tid,
1306 kmpc_task_t taskq_task, size_t sizeof_thunk,
1307 size_t sizeof_shareds, kmp_int32 flags,
1308 kmpc_shared_vars_t **shareds) {
1309 int in_parallel;
1310 kmp_int32 nslots, nthunks, nshareds, nproc;
1311 kmpc_task_queue_t *new_queue, *curr_queue;
1312 kmpc_thunk_t *new_taskq_thunk;
1313 kmp_info_t *th;
1314 kmp_team_t *team;
1315 kmp_taskq_t *tq;
1316 kmp_int32 tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001317
Jonathan Peyton30419822017-05-12 18:01:32 +00001318 KE_TRACE(10, ("__kmpc_taskq called (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001319
Jonathan Peyton30419822017-05-12 18:01:32 +00001320 th = __kmp_threads[global_tid];
1321 team = th->th.th_team;
1322 tq = &team->t.t_taskq;
1323 nproc = team->t.t_nproc;
1324 tid = __kmp_tid_from_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001325
Jonathan Peyton30419822017-05-12 18:01:32 +00001326 /* find out whether this is a parallel taskq or serialized one. */
1327 in_parallel = in_parallel_context(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001328
Jonathan Peyton30419822017-05-12 18:01:32 +00001329 if (!tq->tq_root) {
1330 if (in_parallel) {
1331 /* Vector ORDERED SECTION to taskq version */
1332 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001333
Jonathan Peyton30419822017-05-12 18:01:32 +00001334 /* Vector ORDERED SECTION to taskq version */
1335 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001336 }
1337
Jim Cownie5e8470a2013-09-27 10:38:44 +00001338 if (in_parallel) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001339 // This shouldn't be a barrier region boundary, it will confuse the user.
1340 /* Need the boundary to be at the end taskq instead. */
1341 if (__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) {
1342 /* Creating the active root queue, and we are not the master thread. */
1343 /* The master thread below created the queue and tasks have been */
1344 /* enqueued, and the master thread released this barrier. This */
1345 /* worker thread can now proceed and execute tasks. See also the */
1346 /* TQF_RELEASE_WORKERS which is used to handle this case. */
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001347 *shareds =
1348 CCAST(kmpc_shared_vars_t *, tq->tq_root->tq_shareds[tid].ai_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00001349 KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid));
1350
1351 return NULL;
1352 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001353 }
1354
Jonathan Peyton30419822017-05-12 18:01:32 +00001355 /* master thread only executes this code */
1356 if (tq->tq_curr_thunk_capacity < nproc) {
1357 if (tq->tq_curr_thunk)
1358 __kmp_free(tq->tq_curr_thunk);
1359 else {
1360 /* only need to do this once at outer level, i.e. when tq_curr_thunk is
1361 * still NULL */
1362 __kmp_init_lock(&tq->tq_freelist_lck);
1363 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001364
Jonathan Peyton30419822017-05-12 18:01:32 +00001365 tq->tq_curr_thunk =
1366 (kmpc_thunk_t **)__kmp_allocate(nproc * sizeof(kmpc_thunk_t *));
1367 tq->tq_curr_thunk_capacity = nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001368 }
1369
Jonathan Peyton30419822017-05-12 18:01:32 +00001370 if (in_parallel)
1371 tq->tq_global_flags = TQF_RELEASE_WORKERS;
1372 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001373
Jonathan Peyton30419822017-05-12 18:01:32 +00001374 /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */
1375 /* on some heuristics (e.g., depth of queue nesting?). */
1376 nslots = (in_parallel) ? (2 * nproc) : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001377
Jonathan Peyton30419822017-05-12 18:01:32 +00001378 /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */
1379 /* jobs being executed by other threads, and one extra for taskq slot */
1380 nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1)
1381 : nslots + 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001382
Jonathan Peyton30419822017-05-12 18:01:32 +00001383 /* Only the root taskq gets a per-thread array of shareds. */
1384 /* The rest of the taskq's only get one copy of the shared vars. */
1385 nshareds = (!tq->tq_root && in_parallel) ? nproc : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001386
Jonathan Peyton30419822017-05-12 18:01:32 +00001387 /* create overall queue data structure and its components that require
1388 * allocation */
1389 new_queue = __kmp_alloc_taskq(tq, in_parallel, nslots, nthunks, nshareds,
1390 nproc, sizeof_thunk, sizeof_shareds,
1391 &new_taskq_thunk, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001392
Jonathan Peyton30419822017-05-12 18:01:32 +00001393 /* rest of new_queue initializations */
1394 new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001395
Jonathan Peyton30419822017-05-12 18:01:32 +00001396 if (in_parallel) {
1397 new_queue->tq_tasknum_queuing = 0;
1398 new_queue->tq_tasknum_serving = 0;
1399 new_queue->tq_flags |= TQF_PARALLEL_CONTEXT;
1400 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001401
Jonathan Peyton30419822017-05-12 18:01:32 +00001402 new_queue->tq_taskq_slot = NULL;
1403 new_queue->tq_nslots = nslots;
1404 new_queue->tq_hiwat = HIGH_WATER_MARK(nslots);
1405 new_queue->tq_nfull = 0;
1406 new_queue->tq_head = 0;
1407 new_queue->tq_tail = 0;
1408 new_queue->tq_loc = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001409
Jonathan Peyton30419822017-05-12 18:01:32 +00001410 if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
1411 /* prepare to serve the first-queued task's ORDERED directive */
1412 new_queue->tq_tasknum_serving = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001413
Jonathan Peyton30419822017-05-12 18:01:32 +00001414 /* Vector ORDERED SECTION to taskq version */
1415 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001416
Jonathan Peyton30419822017-05-12 18:01:32 +00001417 /* Vector ORDERED SECTION to taskq version */
1418 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
1419 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001420
Jonathan Peyton30419822017-05-12 18:01:32 +00001421 /* create a new thunk for the taskq_task in the new_queue */
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001422 *shareds = CCAST(kmpc_shared_vars_t *, new_queue->tq_shareds[0].ai_data);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001423
Jonathan Peyton30419822017-05-12 18:01:32 +00001424 new_taskq_thunk->th.th_shareds = *shareds;
1425 new_taskq_thunk->th_task = taskq_task;
1426 new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK;
1427 new_taskq_thunk->th_status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001428
Jonathan Peyton30419822017-05-12 18:01:32 +00001429 KMP_DEBUG_ASSERT(new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001430
Jonathan Peyton30419822017-05-12 18:01:32 +00001431 // Make sure these inits complete before threads start using this queue
1432 /* KMP_MB(); */ // (necessary?)
1433
1434 /* insert the new task queue into the tree, but only after all fields
1435 * initialized */
1436
1437 if (in_parallel) {
1438 if (!tq->tq_root) {
1439 new_queue->tq.tq_parent = NULL;
1440 new_queue->tq_first_child = NULL;
1441 new_queue->tq_next_child = NULL;
1442 new_queue->tq_prev_child = NULL;
1443 new_queue->tq_ref_count = 1;
1444 tq->tq_root = new_queue;
1445 } else {
1446 curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
1447 new_queue->tq.tq_parent = curr_queue;
1448 new_queue->tq_first_child = NULL;
1449 new_queue->tq_prev_child = NULL;
1450 new_queue->tq_ref_count =
1451 1; /* for this the thread that built the queue */
1452
1453 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n", __LINE__,
1454 global_tid, new_queue, new_queue->tq_ref_count));
1455
1456 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
1457
1458 // Make sure data structures are in consistent state before querying them
1459 // Seems to work without this for digital/alpha, needed for IBM/RS6000
1460 KMP_MB();
1461
1462 new_queue->tq_next_child =
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001463 CCAST(struct kmpc_task_queue_t *, curr_queue->tq_first_child);
Jonathan Peyton30419822017-05-12 18:01:32 +00001464
1465 if (curr_queue->tq_first_child != NULL)
1466 curr_queue->tq_first_child->tq_prev_child = new_queue;
1467
1468 curr_queue->tq_first_child = new_queue;
1469
1470 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001471 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001472
1473 /* set up thunk stack only after code that determines curr_queue above */
1474 new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1475 tq->tq_curr_thunk[tid] = new_taskq_thunk;
1476
1477 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1478 } else {
1479 new_taskq_thunk->th_encl_thunk = 0;
1480 new_queue->tq.tq_parent = NULL;
1481 new_queue->tq_first_child = NULL;
1482 new_queue->tq_next_child = NULL;
1483 new_queue->tq_prev_child = NULL;
1484 new_queue->tq_ref_count = 1;
1485 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001486
1487#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001488 KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid));
1489 KF_DUMP(150, __kmp_dump_thunk(tq, new_taskq_thunk, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001490
Jonathan Peyton30419822017-05-12 18:01:32 +00001491 if (in_parallel) {
1492 KF_TRACE(25,
1493 ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
1494 } else {
1495 KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue,
1496 global_tid));
1497 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001498
Jonathan Peyton30419822017-05-12 18:01:32 +00001499 KF_DUMP(25, __kmp_dump_task_queue(tq, new_queue, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001500
Jonathan Peyton30419822017-05-12 18:01:32 +00001501 if (in_parallel) {
1502 KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
1503 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001504#endif /* KMP_DEBUG */
1505
Jonathan Peyton30419822017-05-12 18:01:32 +00001506 if (__kmp_env_consistency_check)
1507 __kmp_push_workshare(global_tid, ct_taskq, new_queue->tq_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001508
Jonathan Peyton30419822017-05-12 18:01:32 +00001509 KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001510
Jonathan Peyton30419822017-05-12 18:01:32 +00001511 return new_taskq_thunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001512}
1513
Jim Cownie5e8470a2013-09-27 10:38:44 +00001514/* ends a taskq; last thread out destroys the queue */
1515
Jonathan Peyton30419822017-05-12 18:01:32 +00001516void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
1517 kmpc_thunk_t *taskq_thunk) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001518#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001519 kmp_int32 i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001520#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001521 kmp_taskq_t *tq;
1522 int in_parallel;
1523 kmp_info_t *th;
1524 kmp_int32 is_outermost;
1525 kmpc_task_queue_t *queue;
1526 kmpc_thunk_t *thunk;
1527 int nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001528
Jonathan Peyton30419822017-05-12 18:01:32 +00001529 KE_TRACE(10, ("__kmpc_end_taskq called (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001530
Jonathan Peyton30419822017-05-12 18:01:32 +00001531 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1532 nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001533
Jonathan Peyton30419822017-05-12 18:01:32 +00001534 /* For the outermost taskq only, all but one thread will have taskq_thunk ==
1535 * NULL */
1536 queue = (taskq_thunk == NULL) ? tq->tq_root
1537 : taskq_thunk->th.th_shareds->sv_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001538
Jonathan Peyton30419822017-05-12 18:01:32 +00001539 KE_TRACE(50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
1540 is_outermost = (queue == tq->tq_root);
1541 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001542
Jonathan Peyton30419822017-05-12 18:01:32 +00001543 if (in_parallel) {
1544 kmp_uint32 spins;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001545
Jonathan Peyton30419822017-05-12 18:01:32 +00001546 /* this is just a safeguard to release the waiting threads if */
1547 /* the outermost taskq never queues a task */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001548
Jonathan Peyton30419822017-05-12 18:01:32 +00001549 if (is_outermost && (KMP_MASTER_GTID(global_tid))) {
1550 if (tq->tq_global_flags & TQF_RELEASE_WORKERS) {
1551 /* no lock needed, workers are still in spin mode */
1552 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001553
Jonathan Peyton30419822017-05-12 18:01:32 +00001554 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1555 }
1556 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001557
Jonathan Peyton30419822017-05-12 18:01:32 +00001558 /* keep dequeueing work until all tasks are queued and dequeued */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001559
Jonathan Peyton30419822017-05-12 18:01:32 +00001560 do {
1561 /* wait until something is available to dequeue */
1562 KMP_INIT_YIELD(spins);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001563
Jonathan Peyton30419822017-05-12 18:01:32 +00001564 while ((queue->tq_nfull == 0) && (queue->tq_taskq_slot == NULL) &&
1565 (!__kmp_taskq_has_any_children(queue)) &&
1566 (!(queue->tq_flags & TQF_ALL_TASKS_QUEUED))) {
1567 KMP_YIELD_WHEN(TRUE, spins);
1568 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001569
Jonathan Peyton30419822017-05-12 18:01:32 +00001570 /* check to see if we can execute tasks in the queue */
1571 while (((queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL)) &&
1572 (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL) {
1573 KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk,
1574 queue, global_tid));
1575 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1576 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001577
Jonathan Peyton30419822017-05-12 18:01:32 +00001578 /* see if work found can be found in a descendant queue */
1579 if ((__kmp_taskq_has_any_children(queue)) &&
1580 (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) !=
1581 NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001582
Jonathan Peyton30419822017-05-12 18:01:32 +00001583 KF_TRACE(50,
1584 ("Stole thunk: %p in descendant queue: %p while waiting in "
1585 "queue: %p (%d)\n",
1586 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001587
Jonathan Peyton30419822017-05-12 18:01:32 +00001588 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1589 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001590
Jonathan Peyton30419822017-05-12 18:01:32 +00001591 } while ((!(queue->tq_flags & TQF_ALL_TASKS_QUEUED)) ||
1592 (queue->tq_nfull != 0));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001593
Jonathan Peyton30419822017-05-12 18:01:32 +00001594 KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue,
1595 global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001596
Jonathan Peyton30419822017-05-12 18:01:32 +00001597 /* wait while all tasks are not finished and more work found
1598 in descendant queues */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001599
Jonathan Peyton30419822017-05-12 18:01:32 +00001600 while ((!__kmp_taskq_tasks_finished(queue)) &&
1601 (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) !=
1602 NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001603
Jonathan Peyton30419822017-05-12 18:01:32 +00001604 KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in "
1605 "queue: %p (%d)\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001606 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1607
Jonathan Peyton30419822017-05-12 18:01:32 +00001608 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001609 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001610
Jonathan Peyton30419822017-05-12 18:01:32 +00001611 KF_TRACE(50, ("No work found in descendent queues or all work finished in "
1612 "queue: %p (%d)\n",
1613 queue, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001614
Jonathan Peyton30419822017-05-12 18:01:32 +00001615 if (!is_outermost) {
1616 /* need to return if NOWAIT present and not outermost taskq */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001617
Jonathan Peyton30419822017-05-12 18:01:32 +00001618 if (queue->tq_flags & TQF_IS_NOWAIT) {
1619 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1620 queue->tq_ref_count--;
1621 KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0);
1622 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001623
Jonathan Peyton30419822017-05-12 18:01:32 +00001624 KE_TRACE(
1625 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001626
Jonathan Peyton30419822017-05-12 18:01:32 +00001627 return;
1628 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001629
Jonathan Peyton30419822017-05-12 18:01:32 +00001630 __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
1631
1632 /* WAIT until all tasks are finished and no child queues exist before
1633 * proceeding */
1634 KMP_INIT_YIELD(spins);
1635
1636 while (!__kmp_taskq_tasks_finished(queue) ||
1637 __kmp_taskq_has_any_children(queue)) {
1638 thunk = __kmp_find_task_in_ancestor_queue(tq, global_tid, queue);
1639
1640 if (thunk != NULL) {
1641 KF_TRACE(50,
1642 ("Stole thunk: %p in ancestor queue: %p while waiting in "
1643 "queue: %p (%d)\n",
1644 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1645 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk,
1646 in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001647 }
1648
Jonathan Peyton30419822017-05-12 18:01:32 +00001649 KMP_YIELD_WHEN(thunk == NULL, spins);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001650
Jonathan Peyton30419822017-05-12 18:01:32 +00001651 __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
1652 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001653
Jonathan Peyton30419822017-05-12 18:01:32 +00001654 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
1655 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001656 queue->tq_flags |= TQF_DEALLOCATED;
Jonathan Peyton30419822017-05-12 18:01:32 +00001657 }
1658 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1659
1660 /* only the allocating thread can deallocate the queue */
1661 if (taskq_thunk != NULL) {
1662 __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE);
1663 }
1664
1665 KE_TRACE(
1666 10,
1667 ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n",
1668 global_tid));
1669
1670 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001671 }
1672
Jonathan Peyton30419822017-05-12 18:01:32 +00001673 // Outermost Queue: steal work from descendants until all tasks are finished
1674
1675 KMP_INIT_YIELD(spins);
1676
1677 while (!__kmp_taskq_tasks_finished(queue)) {
1678 thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
1679
1680 if (thunk != NULL) {
1681 KF_TRACE(50,
1682 ("Stole thunk: %p in descendant queue: %p while waiting in "
1683 "queue: %p (%d)\n",
1684 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1685
1686 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1687 }
1688
1689 KMP_YIELD_WHEN(thunk == NULL, spins);
1690 }
1691
1692 /* Need this barrier to prevent destruction of queue before threads have all
1693 * executed above code */
1694 /* This may need to be done earlier when NOWAIT is implemented for the
1695 * outermost level */
1696
1697 if (!__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) {
1698 /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here; */
1699 /* for right now, everybody waits, and the master thread destroys the */
1700 /* remaining queues. */
1701
1702 __kmp_remove_all_child_taskq(tq, global_tid, queue);
1703
1704 /* Now destroy the root queue */
1705 KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n",
1706 global_tid, queue));
1707 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1708
1709#ifdef KMP_DEBUG
1710 /* the root queue entry */
1711 KMP_DEBUG_ASSERT((queue->tq.tq_parent == NULL) &&
1712 (queue->tq_next_child == NULL));
1713
1714 /* children must all be gone by now because of barrier above */
1715 KMP_DEBUG_ASSERT(queue->tq_first_child == NULL);
1716
1717 for (i = 0; i < nproc; i++) {
1718 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1719 }
1720
1721 for (i = 0, thunk = queue->tq_free_thunks; thunk != NULL;
1722 i++, thunk = thunk->th.th_next_free)
1723 ;
1724
1725 KMP_DEBUG_ASSERT(i ==
1726 queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
1727
1728 for (i = 0; i < nproc; i++) {
1729 KMP_DEBUG_ASSERT(!tq->tq_curr_thunk[i]);
1730 }
1731#endif
1732 /* unlink the root queue entry */
1733 tq->tq_root = NULL;
1734
1735 /* release storage for root queue entry */
1736 KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue,
1737 global_tid));
1738
1739 queue->tq_flags |= TQF_DEALLOCATED;
1740 __kmp_free_taskq(tq, queue, in_parallel, global_tid);
1741
1742 KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
1743
1744 /* release the workers now that the data structures are up to date */
1745 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1746 }
1747
1748 th = __kmp_threads[global_tid];
1749
1750 /* Reset ORDERED SECTION to parallel version */
1751 th->th.th_dispatch->th_deo_fcn = 0;
1752
1753 /* Reset ORDERED SECTION to parallel version */
1754 th->th.th_dispatch->th_dxo_fcn = 0;
1755 } else {
1756 /* in serial execution context, dequeue the last task */
1757 /* and execute it, if there were any tasks encountered */
1758
1759 if (queue->tq_nfull > 0) {
1760 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
1761
1762 thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
1763
1764 if (queue->tq_flags & TQF_IS_LAST_TASK) {
1765 /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */
1766 /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
1767 /* instrumentation does copy-out. */
1768
1769 /* no need for test_then_or call since already locked */
1770 thunk->th_flags |= TQF_IS_LAST_TASK;
1771 }
1772
1773 KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid,
1774 thunk, queue));
1775
1776 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1777 }
1778
1779 // destroy the unattached serial queue now that there is no more work to do
1780 KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n",
1781 queue, global_tid));
1782 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1783
1784#ifdef KMP_DEBUG
1785 i = 0;
1786 for (thunk = queue->tq_free_thunks; thunk != NULL;
1787 thunk = thunk->th.th_next_free)
1788 ++i;
1789 KMP_DEBUG_ASSERT(i == queue->tq_nslots + 1);
1790#endif
1791 /* release storage for unattached serial queue */
1792 KF_TRACE(50,
1793 ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
1794
1795 queue->tq_flags |= TQF_DEALLOCATED;
1796 __kmp_free_taskq(tq, queue, in_parallel, global_tid);
1797 }
1798
1799 KE_TRACE(10, ("__kmpc_end_taskq return (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001800}
1801
1802/* Enqueues a task for thunk previously created by __kmpc_task_buffer. */
1803/* Returns nonzero if just filled up queue */
1804
Jonathan Peyton30419822017-05-12 18:01:32 +00001805kmp_int32 __kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) {
1806 kmp_int32 ret;
1807 kmpc_task_queue_t *queue;
1808 int in_parallel;
1809 kmp_taskq_t *tq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001810
Jonathan Peyton30419822017-05-12 18:01:32 +00001811 KE_TRACE(10, ("__kmpc_task called (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001812
Jonathan Peyton30419822017-05-12 18:01:32 +00001813 KMP_DEBUG_ASSERT(!(thunk->th_flags &
1814 TQF_TASKQ_TASK)); /* thunk->th_task is a regular task */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001815
Jonathan Peyton30419822017-05-12 18:01:32 +00001816 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1817 queue = thunk->th.th_shareds->sv_queue;
1818 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001819
Jonathan Peyton30419822017-05-12 18:01:32 +00001820 if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
1821 thunk->th_tasknum = ++queue->tq_tasknum_queuing;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001822
Jonathan Peyton30419822017-05-12 18:01:32 +00001823 /* For serial execution dequeue the preceding task and execute it, if one
1824 * exists */
1825 /* This cannot be the last task. That one is handled in __kmpc_end_taskq */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001826
Jonathan Peyton30419822017-05-12 18:01:32 +00001827 if (!in_parallel && queue->tq_nfull > 0) {
1828 kmpc_thunk_t *prev_thunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001829
Jonathan Peyton30419822017-05-12 18:01:32 +00001830 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001831
Jonathan Peyton30419822017-05-12 18:01:32 +00001832 prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001833
Jonathan Peyton30419822017-05-12 18:01:32 +00001834 KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid,
1835 prev_thunk, queue));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001836
Jonathan Peyton30419822017-05-12 18:01:32 +00001837 __kmp_execute_task_from_queue(tq, loc, global_tid, prev_thunk, in_parallel);
1838 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001839
Jonathan Peyton30419822017-05-12 18:01:32 +00001840 /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private
1841 variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the
1842 task queue is not full and allocates a thunk (which is then passed to
1843 __kmpc_task()). So, the enqueue below should never fail due to a full
1844 queue. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001845
Jonathan Peyton30419822017-05-12 18:01:32 +00001846 KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid));
1847 KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001848
Jonathan Peyton30419822017-05-12 18:01:32 +00001849 ret = __kmp_enqueue_task(tq, global_tid, queue, thunk, in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001850
Jonathan Peyton30419822017-05-12 18:01:32 +00001851 KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid));
1852 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001853
Jonathan Peyton30419822017-05-12 18:01:32 +00001854 KE_TRACE(10, ("__kmpc_task return (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001855
Jonathan Peyton30419822017-05-12 18:01:32 +00001856 return ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001857}
1858
1859/* enqueues a taskq_task for thunk previously created by __kmpc_taskq */
1860/* this should never be called unless in a parallel context */
1861
Jonathan Peyton30419822017-05-12 18:01:32 +00001862void __kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk,
1863 kmp_int32 status) {
1864 kmpc_task_queue_t *queue;
1865 kmp_taskq_t *tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1866 int tid = __kmp_tid_from_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001867
Jonathan Peyton30419822017-05-12 18:01:32 +00001868 KE_TRACE(10, ("__kmpc_taskq_task called (%d)\n", global_tid));
1869 KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid));
1870 KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001871
Jonathan Peyton30419822017-05-12 18:01:32 +00001872 queue = thunk->th.th_shareds->sv_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001873
Jonathan Peyton30419822017-05-12 18:01:32 +00001874 if (__kmp_env_consistency_check)
1875 __kmp_pop_workshare(global_tid, ct_taskq, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001876
Jonathan Peyton30419822017-05-12 18:01:32 +00001877 /* thunk->th_task is the taskq_task */
1878 KMP_DEBUG_ASSERT(thunk->th_flags & TQF_TASKQ_TASK);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001879
Jonathan Peyton30419822017-05-12 18:01:32 +00001880 /* not supposed to call __kmpc_taskq_task if it's already enqueued */
1881 KMP_DEBUG_ASSERT(queue->tq_taskq_slot == NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001882
Jonathan Peyton30419822017-05-12 18:01:32 +00001883 /* dequeue taskq thunk from curr_thunk stack */
1884 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1885 thunk->th_encl_thunk = NULL;
1886
1887 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1888
1889 thunk->th_status = status;
1890
1891 // Flush thunk->th_status before taskq_task enqueued to avoid race condition
1892 KMP_MB();
1893
1894 /* enqueue taskq_task in thunk into special slot in queue */
1895 /* GEH - probably don't need to lock taskq slot since only one */
1896 /* thread enqueues & already a lock set at dequeue point */
1897
1898 queue->tq_taskq_slot = thunk;
1899
1900 KE_TRACE(10, ("__kmpc_taskq_task return (%d)\n", global_tid));
1901}
1902
1903/* ends a taskq_task; done generating tasks */
1904
1905void __kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid,
1906 kmpc_thunk_t *thunk) {
1907 kmp_taskq_t *tq;
1908 kmpc_task_queue_t *queue;
1909 int in_parallel;
1910 int tid;
1911
1912 KE_TRACE(10, ("__kmpc_end_taskq_task called (%d)\n", global_tid));
1913
1914 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1915 queue = thunk->th.th_shareds->sv_queue;
1916 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1917 tid = __kmp_tid_from_gtid(global_tid);
1918
1919 if (__kmp_env_consistency_check)
1920 __kmp_pop_workshare(global_tid, ct_taskq, loc);
1921
1922 if (in_parallel) {
1923#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001924 KMP_TEST_THEN_OR32(CCAST(kmp_int32 *, &queue->tq_flags),
1925 (kmp_int32)TQF_ALL_TASKS_QUEUED);
Jonathan Peyton30419822017-05-12 18:01:32 +00001926#else
1927 {
1928 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
1929
1930 // Make sure data structures are in consistent state before querying them
1931 // Seems to work without this for digital/alpha, needed for IBM/RS6000
1932 KMP_MB();
1933
1934 queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
1935 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1936 }
1937#endif
1938 }
1939
1940 if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
1941 /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in
1942 the queue if TQF_IS_LASTPRIVATE so we can positively identify that last
1943 task and run it with its TQF_IS_LAST_TASK bit turned on in th_flags.
1944 When __kmpc_end_taskq_task() is called we are done generating all the
1945 tasks, so we know the last one in the queue is the lastprivate task.
1946 Mark the queue as having gotten to this state via tq_flags &
1947 TQF_IS_LAST_TASK; when that task actually executes mark it via th_flags &
1948 TQF_IS_LAST_TASK (this th_flags bit signals the instrumented code to do
1949 copy-outs after execution). */
1950 if (!in_parallel) {
1951 /* No synchronization needed for serial context */
1952 queue->tq_flags |= TQF_IS_LAST_TASK;
1953 } else {
1954#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001955 KMP_TEST_THEN_OR32(CCAST(kmp_int32 *, &queue->tq_flags),
1956 (kmp_int32)TQF_IS_LAST_TASK);
Jonathan Peyton30419822017-05-12 18:01:32 +00001957#else
1958 {
1959 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
1960
1961 // Make sure data structures in consistent state before querying them
1962 // Seems to work without this for digital/alpha, needed for IBM/RS6000
1963 KMP_MB();
1964
1965 queue->tq_flags |= TQF_IS_LAST_TASK;
1966 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1967 }
1968#endif
1969 /* to prevent race condition where last task is dequeued but */
1970 /* flag isn't visible yet (not sure about this) */
1971 KMP_MB();
1972 }
1973 }
1974
1975 /* dequeue taskq thunk from curr_thunk stack */
1976 if (in_parallel) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001977 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1978 thunk->th_encl_thunk = NULL;
1979
Jonathan Peyton30419822017-05-12 18:01:32 +00001980 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1981 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001982
Jonathan Peyton30419822017-05-12 18:01:32 +00001983 KE_TRACE(10, ("__kmpc_end_taskq_task return (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001984}
1985
1986/* returns thunk for a regular task based on taskq_thunk */
1987/* (__kmpc_taskq_task does the analogous thing for a TQF_TASKQ_TASK) */
1988
Jonathan Peyton30419822017-05-12 18:01:32 +00001989kmpc_thunk_t *__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid,
1990 kmpc_thunk_t *taskq_thunk, kmpc_task_t task) {
1991 kmp_taskq_t *tq;
1992 kmpc_task_queue_t *queue;
1993 kmpc_thunk_t *new_thunk;
1994 int in_parallel;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995
Jonathan Peyton30419822017-05-12 18:01:32 +00001996 KE_TRACE(10, ("__kmpc_task_buffer called (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997
Jonathan Peyton30419822017-05-12 18:01:32 +00001998 KMP_DEBUG_ASSERT(
1999 taskq_thunk->th_flags &
2000 TQF_TASKQ_TASK); /* taskq_thunk->th_task is the taskq_task */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002001
Jonathan Peyton30419822017-05-12 18:01:32 +00002002 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
2003 queue = taskq_thunk->th.th_shareds->sv_queue;
2004 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002005
Jonathan Peyton30419822017-05-12 18:01:32 +00002006 /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private
2007 variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the
2008 task queue is not full and allocates a thunk (which is then passed to
2009 __kmpc_task()). So, we can pre-allocate a thunk here assuming it will be
2010 the next to be enqueued in __kmpc_task(). */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002011
Jonathan Peyton30419822017-05-12 18:01:32 +00002012 new_thunk = __kmp_alloc_thunk(queue, in_parallel, global_tid);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00002013 new_thunk->th.th_shareds =
2014 CCAST(kmpc_shared_vars_t *, queue->tq_shareds[0].ai_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00002015 new_thunk->th_encl_thunk = NULL;
2016 new_thunk->th_task = task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002017
Jonathan Peyton30419822017-05-12 18:01:32 +00002018 /* GEH - shouldn't need to lock the read of tq_flags here */
2019 new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002020
Jonathan Peyton30419822017-05-12 18:01:32 +00002021 new_thunk->th_status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002022
Jonathan Peyton30419822017-05-12 18:01:32 +00002023 KMP_DEBUG_ASSERT(!(new_thunk->th_flags & TQF_TASKQ_TASK));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002024
Jonathan Peyton30419822017-05-12 18:01:32 +00002025 KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid));
2026 KF_DUMP(100, __kmp_dump_thunk(tq, new_thunk, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002027
Jonathan Peyton30419822017-05-12 18:01:32 +00002028 KE_TRACE(10, ("__kmpc_task_buffer return (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002029
Jonathan Peyton30419822017-05-12 18:01:32 +00002030 return new_thunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002031}