blob: 4b4571a1119c3d7eb4fa51712e577fb3eddd048f [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_taskq.cpp -- TASKQ support for OpenMP.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000017#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000018#include "kmp_i18n.h"
19#include "kmp_io.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000020
21#define MAX_MESSAGE 512
22
Jonathan Peyton30419822017-05-12 18:01:32 +000023/* Taskq routines and global variables */
Jim Cownie5e8470a2013-09-27 10:38:44 +000024
Jonathan Peyton30419822017-05-12 18:01:32 +000025#define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x);
Jim Cownie5e8470a2013-09-27 10:38:44 +000026
27#define THREAD_ALLOC_FOR_TASKQ
28
Jonathan Peyton30419822017-05-12 18:01:32 +000029static int in_parallel_context(kmp_team_t *team) {
30 return !team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +000031}
32
Jonathan Peyton30419822017-05-12 18:01:32 +000033static void __kmp_taskq_eo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
34 int gtid = *gtid_ref;
35 int tid = __kmp_tid_from_gtid(gtid);
36 kmp_uint32 my_token;
37 kmpc_task_queue_t *taskq;
38 kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq;
Jim Cownie5e8470a2013-09-27 10:38:44 +000039
Jonathan Peyton30419822017-05-12 18:01:32 +000040 if (__kmp_env_consistency_check)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000041#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +000042 __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000043#else
Jonathan Peyton30419822017-05-12 18:01:32 +000044 __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000045#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000046
Jonathan Peyton30419822017-05-12 18:01:32 +000047 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) {
48 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +000049
Jonathan Peyton30419822017-05-12 18:01:32 +000050 /* GEH - need check here under stats to make sure */
51 /* inside task (curr_thunk[*tid_ref] != NULL) */
Jim Cownie5e8470a2013-09-27 10:38:44 +000052
Jonathan Peyton30419822017-05-12 18:01:32 +000053 my_token = tq->tq_curr_thunk[tid]->th_tasknum;
Jim Cownie5e8470a2013-09-27 10:38:44 +000054
Jonathan Peyton30419822017-05-12 18:01:32 +000055 taskq = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +000056
Jonathan Peyton30419822017-05-12 18:01:32 +000057 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
58 KMP_MB();
59 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000060}
61
Jonathan Peyton30419822017-05-12 18:01:32 +000062static void __kmp_taskq_xo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
63 int gtid = *gtid_ref;
64 int tid = __kmp_tid_from_gtid(gtid);
65 kmp_uint32 my_token;
66 kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq;
Jim Cownie5e8470a2013-09-27 10:38:44 +000067
Jonathan Peyton30419822017-05-12 18:01:32 +000068 if (__kmp_env_consistency_check)
69 __kmp_pop_sync(gtid, ct_ordered_in_taskq, loc_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +000070
Jonathan Peyton30419822017-05-12 18:01:32 +000071 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) {
72 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +000073
Jonathan Peyton30419822017-05-12 18:01:32 +000074 /* GEH - need check here under stats to make sure */
75 /* inside task (curr_thunk[tid] != NULL) */
Jim Cownie5e8470a2013-09-27 10:38:44 +000076
Jonathan Peyton30419822017-05-12 18:01:32 +000077 my_token = tq->tq_curr_thunk[tid]->th_tasknum;
Jim Cownie5e8470a2013-09-27 10:38:44 +000078
Jonathan Peyton30419822017-05-12 18:01:32 +000079 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +000080
Jonathan Peyton30419822017-05-12 18:01:32 +000081 tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue->tq_tasknum_serving =
82 my_token + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +000083
Jonathan Peyton30419822017-05-12 18:01:32 +000084 KMP_MB(); /* Flush all pending memory write invalidates. */
85 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000086}
87
Jonathan Peyton30419822017-05-12 18:01:32 +000088static void __kmp_taskq_check_ordered(kmp_int32 gtid, kmpc_thunk_t *thunk) {
89 kmp_uint32 my_token;
90 kmpc_task_queue_t *taskq;
Jim Cownie5e8470a2013-09-27 10:38:44 +000091
Jonathan Peyton30419822017-05-12 18:01:32 +000092 /* assume we are always called from an active parallel context */
Jim Cownie5e8470a2013-09-27 10:38:44 +000093
Jonathan Peyton30419822017-05-12 18:01:32 +000094 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +000095
Jonathan Peyton30419822017-05-12 18:01:32 +000096 my_token = thunk->th_tasknum;
Jim Cownie5e8470a2013-09-27 10:38:44 +000097
Jonathan Peyton30419822017-05-12 18:01:32 +000098 taskq = thunk->th.th_shareds->sv_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +000099
Jonathan Peyton30419822017-05-12 18:01:32 +0000100 if (taskq->tq_tasknum_serving <= my_token) {
101 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
102 KMP_MB();
103 taskq->tq_tasknum_serving = my_token + 1;
104 KMP_MB();
105 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000106}
107
Jonathan Peyton2321d572015-06-08 19:25:25 +0000108#ifdef KMP_DEBUG
109
Jonathan Peyton30419822017-05-12 18:01:32 +0000110static void __kmp_dump_TQF(kmp_int32 flags) {
111 if (flags & TQF_IS_ORDERED)
112 __kmp_printf("ORDERED ");
113 if (flags & TQF_IS_LASTPRIVATE)
114 __kmp_printf("LAST_PRIV ");
115 if (flags & TQF_IS_NOWAIT)
116 __kmp_printf("NOWAIT ");
117 if (flags & TQF_HEURISTICS)
118 __kmp_printf("HEURIST ");
119 if (flags & TQF_INTERFACE_RESERVED1)
120 __kmp_printf("RESERV1 ");
121 if (flags & TQF_INTERFACE_RESERVED2)
122 __kmp_printf("RESERV2 ");
123 if (flags & TQF_INTERFACE_RESERVED3)
124 __kmp_printf("RESERV3 ");
125 if (flags & TQF_INTERFACE_RESERVED4)
126 __kmp_printf("RESERV4 ");
127 if (flags & TQF_IS_LAST_TASK)
128 __kmp_printf("LAST_TASK ");
129 if (flags & TQF_TASKQ_TASK)
130 __kmp_printf("TASKQ_TASK ");
131 if (flags & TQF_RELEASE_WORKERS)
132 __kmp_printf("RELEASE ");
133 if (flags & TQF_ALL_TASKS_QUEUED)
134 __kmp_printf("ALL_QUEUED ");
135 if (flags & TQF_PARALLEL_CONTEXT)
136 __kmp_printf("PARALLEL ");
137 if (flags & TQF_DEALLOCATED)
138 __kmp_printf("DEALLOC ");
139 if (!(flags & (TQF_INTERNAL_FLAGS | TQF_INTERFACE_FLAGS)))
140 __kmp_printf("(NONE)");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000141}
142
Jonathan Peyton30419822017-05-12 18:01:32 +0000143static void __kmp_dump_thunk(kmp_taskq_t *tq, kmpc_thunk_t *thunk,
144 kmp_int32 global_tid) {
145 int i;
146 int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000147
Jonathan Peyton30419822017-05-12 18:01:32 +0000148 __kmp_printf("\tThunk at %p on (%d): ", thunk, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000149
Jonathan Peyton30419822017-05-12 18:01:32 +0000150 if (thunk != NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000151 for (i = 0; i < nproc; i++) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000152 if (tq->tq_curr_thunk[i] == thunk) {
153 __kmp_printf("[%i] ", i);
154 }
155 }
156 __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds);
157 __kmp_printf("th_task=%p, ", thunk->th_task);
158 __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk);
159 __kmp_printf("th_status=%d, ", thunk->th_status);
160 __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum);
161 __kmp_printf("th_flags=");
162 __kmp_dump_TQF(thunk->th_flags);
163 }
164
165 __kmp_printf("\n");
166}
167
168static void __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num) {
169 kmpc_thunk_t *th;
170
171 __kmp_printf(" Thunk stack for T#%d: ", thread_num);
172
173 for (th = thunk; th != NULL; th = th->th_encl_thunk)
174 __kmp_printf("%p ", th);
175
176 __kmp_printf("\n");
177}
178
179static void __kmp_dump_task_queue(kmp_taskq_t *tq, kmpc_task_queue_t *queue,
180 kmp_int32 global_tid) {
181 int qs, count, i;
182 kmpc_thunk_t *thunk;
183 kmpc_task_queue_t *taskq;
184
185 __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid);
186
187 if (queue != NULL) {
188 int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
189
190 if (__kmp_env_consistency_check) {
191 __kmp_printf(" tq_loc : ");
192 }
193 if (in_parallel) {
194
195 // if (queue->tq.tq_parent != 0)
196 //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
197
198 //__kmp_acquire_lock(& queue->tq_link_lck, global_tid);
199
200 // Make sure data structures are in consistent state before querying them
201 // Seems to work without this for digital/alpha, needed for IBM/RS6000
202 KMP_MB();
203
204 __kmp_printf(" tq_parent : %p\n", queue->tq.tq_parent);
205 __kmp_printf(" tq_first_child : %p\n", queue->tq_first_child);
206 __kmp_printf(" tq_next_child : %p\n", queue->tq_next_child);
207 __kmp_printf(" tq_prev_child : %p\n", queue->tq_prev_child);
208 __kmp_printf(" tq_ref_count : %d\n", queue->tq_ref_count);
209
210 //__kmp_release_lock(& queue->tq_link_lck, global_tid);
211
212 // if (queue->tq.tq_parent != 0)
213 //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
214
215 //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
216 //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
217
218 // Make sure data structures are in consistent state before querying them
219 // Seems to work without this for digital/alpha, needed for IBM/RS6000
220 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000221 }
222
Jonathan Peyton30419822017-05-12 18:01:32 +0000223 __kmp_printf(" tq_shareds : ");
224 for (i = 0; i < ((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
225 __kmp_printf("%p ", queue->tq_shareds[i].ai_data);
226 __kmp_printf("\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000227
Jonathan Peyton30419822017-05-12 18:01:32 +0000228 if (in_parallel) {
229 __kmp_printf(" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
230 __kmp_printf(" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000231 }
232
Jonathan Peyton30419822017-05-12 18:01:32 +0000233 __kmp_printf(" tq_queue : %p\n", queue->tq_queue);
234 __kmp_printf(" tq_thunk_space : %p\n", queue->tq_thunk_space);
235 __kmp_printf(" tq_taskq_slot : %p\n", queue->tq_taskq_slot);
236
237 __kmp_printf(" tq_free_thunks : ");
238 for (thunk = queue->tq_free_thunks; thunk != NULL;
239 thunk = thunk->th.th_next_free)
240 __kmp_printf("%p ", thunk);
241 __kmp_printf("\n");
242
243 __kmp_printf(" tq_nslots : %d\n", queue->tq_nslots);
244 __kmp_printf(" tq_head : %d\n", queue->tq_head);
245 __kmp_printf(" tq_tail : %d\n", queue->tq_tail);
246 __kmp_printf(" tq_nfull : %d\n", queue->tq_nfull);
247 __kmp_printf(" tq_hiwat : %d\n", queue->tq_hiwat);
248 __kmp_printf(" tq_flags : ");
249 __kmp_dump_TQF(queue->tq_flags);
250 __kmp_printf("\n");
251
252 if (in_parallel) {
253 __kmp_printf(" tq_th_thunks : ");
254 for (i = 0; i < queue->tq_nproc; i++) {
255 __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data);
256 }
257 __kmp_printf("\n");
258 }
259
260 __kmp_printf("\n");
261 __kmp_printf(" Queue slots:\n");
262
263 qs = queue->tq_tail;
264 for (count = 0; count < queue->tq_nfull; ++count) {
265 __kmp_printf("(%d)", qs);
266 __kmp_dump_thunk(tq, queue->tq_queue[qs].qs_thunk, global_tid);
267 qs = (qs + 1) % queue->tq_nslots;
268 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000269
270 __kmp_printf("\n");
271
Jim Cownie5e8470a2013-09-27 10:38:44 +0000272 if (in_parallel) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000273 if (queue->tq_taskq_slot != NULL) {
274 __kmp_printf(" TaskQ slot:\n");
275 __kmp_dump_thunk(tq, (kmpc_thunk_t *)queue->tq_taskq_slot, global_tid);
276 __kmp_printf("\n");
277 }
278 //__kmp_release_lock(& queue->tq_queue_lck, global_tid);
279 //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000280 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000281 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000282
Jonathan Peyton30419822017-05-12 18:01:32 +0000283 __kmp_printf(" Taskq freelist: ");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000284
Jonathan Peyton30419822017-05-12 18:01:32 +0000285 //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000286
Jonathan Peyton30419822017-05-12 18:01:32 +0000287 // Make sure data structures are in consistent state before querying them
288 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
289 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000290
Jonathan Peyton30419822017-05-12 18:01:32 +0000291 for (taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free)
292 __kmp_printf("%p ", taskq);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000293
Jonathan Peyton30419822017-05-12 18:01:32 +0000294 //__kmp_release_lock( & tq->tq_freelist_lck, global_tid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000295
Jonathan Peyton30419822017-05-12 18:01:32 +0000296 __kmp_printf("\n\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000297}
298
Jonathan Peyton30419822017-05-12 18:01:32 +0000299static void __kmp_aux_dump_task_queue_tree(kmp_taskq_t *tq,
300 kmpc_task_queue_t *curr_queue,
301 kmp_int32 level,
302 kmp_int32 global_tid) {
303 int i, count, qs;
304 int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
305 kmpc_task_queue_t *queue = curr_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000306
Jonathan Peyton30419822017-05-12 18:01:32 +0000307 if (curr_queue == NULL)
308 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000309
Jonathan Peyton30419822017-05-12 18:01:32 +0000310 __kmp_printf(" ");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000311
Jonathan Peyton30419822017-05-12 18:01:32 +0000312 for (i = 0; i < level; i++)
313 __kmp_printf(" ");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000314
Jonathan Peyton30419822017-05-12 18:01:32 +0000315 __kmp_printf("%p", curr_queue);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000316
Jonathan Peyton30419822017-05-12 18:01:32 +0000317 for (i = 0; i < nproc; i++) {
318 if (tq->tq_curr_thunk[i] &&
319 tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue) {
320 __kmp_printf(" [%i]", i);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000321 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000322 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000323
Jonathan Peyton30419822017-05-12 18:01:32 +0000324 __kmp_printf(":");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000325
Jonathan Peyton30419822017-05-12 18:01:32 +0000326 //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000327
Jonathan Peyton30419822017-05-12 18:01:32 +0000328 // Make sure data structures are in consistent state before querying them
329 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
330 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000331
Jonathan Peyton30419822017-05-12 18:01:32 +0000332 qs = curr_queue->tq_tail;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000333
Jonathan Peyton30419822017-05-12 18:01:32 +0000334 for (count = 0; count < curr_queue->tq_nfull; ++count) {
335 __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk);
336 qs = (qs + 1) % curr_queue->tq_nslots;
337 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000338
Jonathan Peyton30419822017-05-12 18:01:32 +0000339 //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000340
Jonathan Peyton30419822017-05-12 18:01:32 +0000341 __kmp_printf("\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000342
Jonathan Peyton30419822017-05-12 18:01:32 +0000343 if (curr_queue->tq_first_child) {
344 //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000345
Jonathan Peyton30419822017-05-12 18:01:32 +0000346 // Make sure data structures are in consistent state before querying them
347 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
Jim Cownie5e8470a2013-09-27 10:38:44 +0000348 KMP_MB();
349
Jonathan Peyton30419822017-05-12 18:01:32 +0000350 if (curr_queue->tq_first_child) {
351 for (queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
352 queue != NULL; queue = queue->tq_next_child) {
353 __kmp_aux_dump_task_queue_tree(tq, queue, level + 1, global_tid);
354 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000355 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000356
Jonathan Peyton30419822017-05-12 18:01:32 +0000357 //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
358 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000359}
360
Jonathan Peyton30419822017-05-12 18:01:32 +0000361static void __kmp_dump_task_queue_tree(kmp_taskq_t *tq,
362 kmpc_task_queue_t *tqroot,
363 kmp_int32 global_tid) {
364 __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
365
366 __kmp_aux_dump_task_queue_tree(tq, tqroot, 0, global_tid);
367
368 __kmp_printf("\n");
369}
370#endif
371
372/* New taskq storage routines that try to minimize overhead of mallocs but
373 still provide cache line alignment. */
374static void *__kmp_taskq_allocate(size_t size, kmp_int32 global_tid) {
375 void *addr, *orig_addr;
376 size_t bytes;
377
378 KB_TRACE(5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int)size,
379 global_tid));
380
381 bytes = sizeof(void *) + CACHE_LINE + size;
382
383#ifdef THREAD_ALLOC_FOR_TASKQ
384 orig_addr =
385 (void *)__kmp_thread_malloc(__kmp_thread_from_gtid(global_tid), bytes);
386#else
387 KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", bytes));
388 orig_addr = (void *)KMP_INTERNAL_MALLOC(bytes);
389#endif /* THREAD_ALLOC_FOR_TASKQ */
390
391 if (orig_addr == 0)
392 KMP_FATAL(OutOfHeapMemory);
393
394 addr = orig_addr;
395
396 if (((kmp_uintptr_t)addr & (CACHE_LINE - 1)) != 0) {
397 KB_TRACE(50, ("__kmp_taskq_allocate: adjust for cache alignment\n"));
398 addr = (void *)(((kmp_uintptr_t)addr + CACHE_LINE) & ~(CACHE_LINE - 1));
399 }
400
401 (*(void **)addr) = orig_addr;
402
403 KB_TRACE(10,
404 ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, "
405 "gtid: %d\n",
406 orig_addr, ((void **)addr) + 1,
407 ((char *)(((void **)addr) + 1)) + size - 1, (int)size, global_tid));
408
409 return (((void **)addr) + 1);
410}
411
412static void __kmpc_taskq_free(void *p, kmp_int32 global_tid) {
413 KB_TRACE(5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid));
414
415 KB_TRACE(10, ("__kmpc_taskq_free: freeing: %p, gtid: %d\n",
416 (*(((void **)p) - 1)), global_tid));
417
418#ifdef THREAD_ALLOC_FOR_TASKQ
419 __kmp_thread_free(__kmp_thread_from_gtid(global_tid), *(((void **)p) - 1));
420#else
421 KMP_INTERNAL_FREE(*(((void **)p) - 1));
422#endif /* THREAD_ALLOC_FOR_TASKQ */
423}
424
425/* Keep freed kmpc_task_queue_t on an internal freelist and recycle since
426 they're of constant size. */
427
428static kmpc_task_queue_t *
429__kmp_alloc_taskq(kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots,
430 kmp_int32 nthunks, kmp_int32 nshareds, kmp_int32 nproc,
431 size_t sizeof_thunk, size_t sizeof_shareds,
432 kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid) {
433 kmp_int32 i;
434 size_t bytes;
435 kmpc_task_queue_t *new_queue;
436 kmpc_aligned_shared_vars_t *shared_var_array;
437 char *shared_var_storage;
438 char *pt; /* for doing byte-adjusted address computations */
439
440 __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid);
441
442 // Make sure data structures are in consistent state before querying them
443 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
444 KMP_MB();
445
446 if (tq->tq_freelist) {
447 new_queue = tq->tq_freelist;
448 tq->tq_freelist = tq->tq_freelist->tq.tq_next_free;
449
450 KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
451
452 new_queue->tq_flags = 0;
453
454 __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
455 } else {
456 __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
457
458 new_queue = (kmpc_task_queue_t *)__kmp_taskq_allocate(
459 sizeof(kmpc_task_queue_t), global_tid);
460 new_queue->tq_flags = 0;
461 }
462
463 /* space in the task queue for queue slots (allocate as one big chunk */
464 /* of storage including new_taskq_task space) */
465
466 sizeof_thunk +=
467 (CACHE_LINE - (sizeof_thunk % CACHE_LINE)); /* pad to cache line size */
468 pt = (char *)__kmp_taskq_allocate(nthunks * sizeof_thunk, global_tid);
469 new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
470 *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
471
472 /* chain the allocated thunks into a freelist for this queue */
473
474 new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
475
476 for (i = 0; i < (nthunks - 2); i++) {
477 ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th.th_next_free =
478 (kmpc_thunk_t *)(pt + (i + 1) * sizeof_thunk);
479#ifdef KMP_DEBUG
480 ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th_flags = TQF_DEALLOCATED;
481#endif
482 }
483
484 ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th.th_next_free = NULL;
485#ifdef KMP_DEBUG
486 ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th_flags =
487 TQF_DEALLOCATED;
488#endif
489
490 /* initialize the locks */
491
492 if (in_parallel) {
493 __kmp_init_lock(&new_queue->tq_link_lck);
494 __kmp_init_lock(&new_queue->tq_free_thunks_lck);
495 __kmp_init_lock(&new_queue->tq_queue_lck);
496 }
497
498 /* now allocate the slots */
499
500 bytes = nslots * sizeof(kmpc_aligned_queue_slot_t);
501 new_queue->tq_queue =
502 (kmpc_aligned_queue_slot_t *)__kmp_taskq_allocate(bytes, global_tid);
503
504 /* space for array of pointers to shared variable structures */
505 sizeof_shareds += sizeof(kmpc_task_queue_t *);
506 sizeof_shareds +=
507 (CACHE_LINE - (sizeof_shareds % CACHE_LINE)); /* pad to cache line size */
508
509 bytes = nshareds * sizeof(kmpc_aligned_shared_vars_t);
510 shared_var_array =
511 (kmpc_aligned_shared_vars_t *)__kmp_taskq_allocate(bytes, global_tid);
512
513 bytes = nshareds * sizeof_shareds;
514 shared_var_storage = (char *)__kmp_taskq_allocate(bytes, global_tid);
515
516 for (i = 0; i < nshareds; i++) {
517 shared_var_array[i].ai_data =
518 (kmpc_shared_vars_t *)(shared_var_storage + i * sizeof_shareds);
519 shared_var_array[i].ai_data->sv_queue = new_queue;
520 }
521 new_queue->tq_shareds = shared_var_array;
522
523 /* array for number of outstanding thunks per thread */
524
525 if (in_parallel) {
526 bytes = nproc * sizeof(kmpc_aligned_int32_t);
527 new_queue->tq_th_thunks =
528 (kmpc_aligned_int32_t *)__kmp_taskq_allocate(bytes, global_tid);
529 new_queue->tq_nproc = nproc;
530
531 for (i = 0; i < nproc; i++)
532 new_queue->tq_th_thunks[i].ai_data = 0;
533 }
534
535 return new_queue;
536}
537
538static void __kmp_free_taskq(kmp_taskq_t *tq, kmpc_task_queue_t *p,
539 int in_parallel, kmp_int32 global_tid) {
540 __kmpc_taskq_free(p->tq_thunk_space, global_tid);
541 __kmpc_taskq_free(p->tq_queue, global_tid);
542
543 /* free shared var structure storage */
544 __kmpc_taskq_free((void *)p->tq_shareds[0].ai_data, global_tid);
545
546 /* free array of pointers to shared vars storage */
547 __kmpc_taskq_free(p->tq_shareds, global_tid);
548
549#ifdef KMP_DEBUG
550 p->tq_first_child = NULL;
551 p->tq_next_child = NULL;
552 p->tq_prev_child = NULL;
553 p->tq_ref_count = -10;
554 p->tq_shareds = NULL;
555 p->tq_tasknum_queuing = 0;
556 p->tq_tasknum_serving = 0;
557 p->tq_queue = NULL;
558 p->tq_thunk_space = NULL;
559 p->tq_taskq_slot = NULL;
560 p->tq_free_thunks = NULL;
561 p->tq_nslots = 0;
562 p->tq_head = 0;
563 p->tq_tail = 0;
564 p->tq_nfull = 0;
565 p->tq_hiwat = 0;
566
567 if (in_parallel) {
568 int i;
569
570 for (i = 0; i < p->tq_nproc; i++)
571 p->tq_th_thunks[i].ai_data = 0;
572 }
573 if (__kmp_env_consistency_check)
574 p->tq_loc = NULL;
575 KMP_DEBUG_ASSERT(p->tq_flags & TQF_DEALLOCATED);
576 p->tq_flags = TQF_DEALLOCATED;
577#endif /* KMP_DEBUG */
578
579 if (in_parallel) {
580 __kmpc_taskq_free(p->tq_th_thunks, global_tid);
581 __kmp_destroy_lock(&p->tq_link_lck);
582 __kmp_destroy_lock(&p->tq_queue_lck);
583 __kmp_destroy_lock(&p->tq_free_thunks_lck);
584 }
585#ifdef KMP_DEBUG
586 p->tq_th_thunks = NULL;
587#endif /* KMP_DEBUG */
588
589 // Make sure data structures are in consistent state before querying them
590 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
591 KMP_MB();
592
593 __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid);
594 p->tq.tq_next_free = tq->tq_freelist;
595
596 tq->tq_freelist = p;
597 __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
598}
599
600/* Once a group of thunks has been allocated for use in a particular queue,
601 these are managed via a per-queue freelist.
602 We force a check that there's always a thunk free if we need one. */
603
604static kmpc_thunk_t *__kmp_alloc_thunk(kmpc_task_queue_t *queue,
605 int in_parallel, kmp_int32 global_tid) {
606 kmpc_thunk_t *fl;
607
608 if (in_parallel) {
609 __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid);
610 // Make sure data structures are in consistent state before querying them
611 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
612 KMP_MB();
613 }
614
615 fl = queue->tq_free_thunks;
616
617 KMP_DEBUG_ASSERT(fl != NULL);
618
619 queue->tq_free_thunks = fl->th.th_next_free;
620 fl->th_flags = 0;
621
622 if (in_parallel)
623 __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid);
624
625 return fl;
626}
627
628static void __kmp_free_thunk(kmpc_task_queue_t *queue, kmpc_thunk_t *p,
629 int in_parallel, kmp_int32 global_tid) {
630#ifdef KMP_DEBUG
631 p->th_task = 0;
632 p->th_encl_thunk = 0;
633 p->th_status = 0;
634 p->th_tasknum = 0;
635/* Also could zero pointers to private vars */
636#endif
637
638 if (in_parallel) {
639 __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid);
640 // Make sure data structures are in consistent state before querying them
641 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
642 KMP_MB();
643 }
644
645 p->th.th_next_free = queue->tq_free_thunks;
646 queue->tq_free_thunks = p;
647
648#ifdef KMP_DEBUG
649 p->th_flags = TQF_DEALLOCATED;
650#endif
651
652 if (in_parallel)
653 __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid);
654}
655
656/* returns nonzero if the queue just became full after the enqueue */
657static kmp_int32 __kmp_enqueue_task(kmp_taskq_t *tq, kmp_int32 global_tid,
658 kmpc_task_queue_t *queue,
659 kmpc_thunk_t *thunk, int in_parallel) {
660 kmp_int32 ret;
661
662 /* dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the
663 * master is executing then) */
664 if (in_parallel) {
665 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
666 // Make sure data structures are in consistent state before querying them
667 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
668 KMP_MB();
669 }
670
671 KMP_DEBUG_ASSERT(queue->tq_nfull < queue->tq_nslots); // check queue not full
672
673 queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
674
675 if (queue->tq_head >= queue->tq_nslots)
676 queue->tq_head = 0;
677
678 (queue->tq_nfull)++;
679
680 KMP_MB(); /* to assure that nfull is seen to increase before
681 TQF_ALL_TASKS_QUEUED is set */
682
683 ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
684
685 if (in_parallel) {
686 /* don't need to wait until workers are released before unlocking */
687 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
688
689 if (tq->tq_global_flags & TQF_RELEASE_WORKERS) {
690 // If just creating the root queue, the worker threads are waiting at a
691 // join barrier until now, when there's something in the queue for them to
692 // do; release them now to do work. This should only be done when this is
693 // the first task enqueued, so reset the flag here also.
694 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; /* no lock needed, workers
695 are still in spin mode */
696 // avoid releasing barrier twice if taskq_task switches threads
697 KMP_MB();
698
699 __kmpc_end_barrier_master(NULL, global_tid);
700 }
701 }
702
703 return ret;
704}
705
706static kmpc_thunk_t *__kmp_dequeue_task(kmp_int32 global_tid,
707 kmpc_task_queue_t *queue,
708 int in_parallel) {
709 kmpc_thunk_t *pt;
710 int tid = __kmp_tid_from_gtid(global_tid);
711
712 KMP_DEBUG_ASSERT(queue->tq_nfull > 0); /* check queue not empty */
713
714 if (queue->tq.tq_parent != NULL && in_parallel) {
715 int ct;
716 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
717 ct = ++(queue->tq_ref_count);
718 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
719 KMP_DEBUG_REF_CTS(
720 ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
721 }
722
723 pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
724
725 if (queue->tq_tail >= queue->tq_nslots)
726 queue->tq_tail = 0;
727
728 if (in_parallel) {
729 queue->tq_th_thunks[tid].ai_data++;
730
731 KMP_MB(); /* necessary so ai_data increment is propagated to other threads
732 immediately (digital) */
733
734 KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding "
735 "thunks from queue %p\n",
736 global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
737 }
738
739 (queue->tq_nfull)--;
740
741#ifdef KMP_DEBUG
742 KMP_MB();
743
744 /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is
745 * decremented */
746
747 KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
748
749 if (in_parallel) {
750 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <=
751 __KMP_TASKQ_THUNKS_PER_TH);
752 }
753#endif
754
755 return pt;
756}
757
758/* Find the next (non-null) task to dequeue and return it.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000759 * This is never called unless in_parallel=TRUE
760 *
761 * Here are the rules for deciding which queue to take the task from:
762 * 1. Walk up the task queue tree from the current queue's parent and look
763 * on the way up (for loop, below).
764 * 2. Do a depth-first search back down the tree from the root and
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000765 * look (find_task_in_descendant_queue()).
Jim Cownie5e8470a2013-09-27 10:38:44 +0000766 *
767 * Here are the rules for deciding which task to take from a queue
768 * (__kmp_find_task_in_queue ()):
769 * 1. Never take the last task from a queue if TQF_IS_LASTPRIVATE; this task
770 * must be staged to make sure we execute the last one with
771 * TQF_IS_LAST_TASK at the end of task queue execution.
772 * 2. If the queue length is below some high water mark and the taskq task
773 * is enqueued, prefer running the taskq task.
774 * 3. Otherwise, take a (normal) task from the queue.
775 *
776 * If we do all this and return pt == NULL at the bottom of this routine,
777 * this means there are no more tasks to execute (except possibly for
778 * TQF_IS_LASTPRIVATE).
779 */
780
Jonathan Peyton30419822017-05-12 18:01:32 +0000781static kmpc_thunk_t *__kmp_find_task_in_queue(kmp_int32 global_tid,
782 kmpc_task_queue_t *queue) {
783 kmpc_thunk_t *pt = NULL;
784 int tid = __kmp_tid_from_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000785
Jonathan Peyton30419822017-05-12 18:01:32 +0000786 /* To prevent deadlock from tq_queue_lck if queue already deallocated */
787 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000788
Jonathan Peyton30419822017-05-12 18:01:32 +0000789 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000790
Jonathan Peyton30419822017-05-12 18:01:32 +0000791 /* Check again to avoid race in __kmpc_end_taskq() */
792 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
793 // Make sure data structures are in consistent state before querying them
794 // Seems to work without this for digital/alpha, needed for IBM/RS6000
795 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000796
Jonathan Peyton30419822017-05-12 18:01:32 +0000797 if ((queue->tq_taskq_slot != NULL) &&
798 (queue->tq_nfull <= queue->tq_hiwat)) {
799 /* if there's enough room in the queue and the dispatcher */
800 /* (taskq task) is available, schedule more tasks */
801 pt = (kmpc_thunk_t *)queue->tq_taskq_slot;
802 queue->tq_taskq_slot = NULL;
803 } else if (queue->tq_nfull == 0 ||
804 queue->tq_th_thunks[tid].ai_data >=
805 __KMP_TASKQ_THUNKS_PER_TH) {
806 /* do nothing if no thunks available or this thread can't */
807 /* run any because it already is executing too many */
808 pt = NULL;
809 } else if (queue->tq_nfull > 1) {
810 /* always safe to schedule a task even if TQF_IS_LASTPRIVATE */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000811
Jonathan Peyton30419822017-05-12 18:01:32 +0000812 pt = __kmp_dequeue_task(global_tid, queue, TRUE);
813 } else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
814 // one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE
815 pt = __kmp_dequeue_task(global_tid, queue, TRUE);
816 } else if (queue->tq_flags & TQF_IS_LAST_TASK) {
817 /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task() */
818 /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
819 /* instrumentation does copy-out. */
820 pt = __kmp_dequeue_task(global_tid, queue, TRUE);
821 pt->th_flags |=
822 TQF_IS_LAST_TASK; /* don't need test_then_or since already locked */
823 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000824 }
825
Jonathan Peyton30419822017-05-12 18:01:32 +0000826 /* GEH - What happens here if is lastprivate, but not last task? */
827 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
828 }
829
830 return pt;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000831}
832
Jonathan Peyton30419822017-05-12 18:01:32 +0000833/* Walk a tree of queues starting at queue's first child and return a non-NULL
834 thunk if one can be scheduled. Must only be called when in_parallel=TRUE */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000835
836static kmpc_thunk_t *
Jonathan Peyton30419822017-05-12 18:01:32 +0000837__kmp_find_task_in_descendant_queue(kmp_int32 global_tid,
838 kmpc_task_queue_t *curr_queue) {
839 kmpc_thunk_t *pt = NULL;
840 kmpc_task_queue_t *queue = curr_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000841
Jonathan Peyton30419822017-05-12 18:01:32 +0000842 if (curr_queue->tq_first_child != NULL) {
843 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
844 // Make sure data structures are in consistent state before querying them
845 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
846 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000847
Jonathan Peyton30419822017-05-12 18:01:32 +0000848 queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
849 if (queue == NULL) {
850 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
851 return NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000852 }
853
Jonathan Peyton30419822017-05-12 18:01:32 +0000854 while (queue != NULL) {
855 int ct;
856 kmpc_task_queue_t *next;
857
858 ct = ++(queue->tq_ref_count);
859 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
860 KMP_DEBUG_REF_CTS(
861 ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
862
863 pt = __kmp_find_task_in_queue(global_tid, queue);
864
865 if (pt != NULL) {
866 int ct;
867
868 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
869 // Make sure data structures in consistent state before querying them
870 // Seems to work without this for digital/alpha, needed for IBM/RS6000
871 KMP_MB();
872
873 ct = --(queue->tq_ref_count);
874 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
875 global_tid, queue, ct));
876 KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0);
877
878 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
879
880 return pt;
881 }
882
883 /* although reference count stays active during descendant walk, shouldn't
884 matter since if children still exist, reference counts aren't being
885 monitored anyway */
886
887 pt = __kmp_find_task_in_descendant_queue(global_tid, queue);
888
889 if (pt != NULL) {
890 int ct;
891
892 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
893 // Make sure data structures in consistent state before querying them
894 // Seems to work without this for digital/alpha, needed for IBM/RS6000
895 KMP_MB();
896
897 ct = --(queue->tq_ref_count);
898 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
899 global_tid, queue, ct));
900 KMP_DEBUG_ASSERT(ct >= 0);
901
902 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
903
904 return pt;
905 }
906
907 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
908 // Make sure data structures in consistent state before querying them
909 // Seems to work without this for digital/alpha, needed for IBM/RS6000
910 KMP_MB();
911
912 next = queue->tq_next_child;
913
914 ct = --(queue->tq_ref_count);
915 KMP_DEBUG_REF_CTS(
916 ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
917 KMP_DEBUG_ASSERT(ct >= 0);
918
919 queue = next;
920 }
921
922 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
923 }
924
925 return pt;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000926}
927
Jonathan Peyton30419822017-05-12 18:01:32 +0000928/* Walk up the taskq tree looking for a task to execute. If we get to the root,
929 search the tree for a descendent queue task. Must only be called when
930 in_parallel=TRUE */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000931static kmpc_thunk_t *
Jonathan Peyton30419822017-05-12 18:01:32 +0000932__kmp_find_task_in_ancestor_queue(kmp_taskq_t *tq, kmp_int32 global_tid,
933 kmpc_task_queue_t *curr_queue) {
934 kmpc_task_queue_t *queue;
935 kmpc_thunk_t *pt;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000936
Jonathan Peyton30419822017-05-12 18:01:32 +0000937 pt = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000938
Jonathan Peyton30419822017-05-12 18:01:32 +0000939 if (curr_queue->tq.tq_parent != NULL) {
940 queue = curr_queue->tq.tq_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000941
Jonathan Peyton30419822017-05-12 18:01:32 +0000942 while (queue != NULL) {
943 if (queue->tq.tq_parent != NULL) {
944 int ct;
945 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
946 // Make sure data structures in consistent state before querying them
947 // Seems to work without this for digital/alpha, needed for IBM/RS6000
948 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000949
Jonathan Peyton30419822017-05-12 18:01:32 +0000950 ct = ++(queue->tq_ref_count);
951 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
952 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", __LINE__,
953 global_tid, queue, ct));
954 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000955
Jonathan Peyton30419822017-05-12 18:01:32 +0000956 pt = __kmp_find_task_in_queue(global_tid, queue);
957 if (pt != NULL) {
958 if (queue->tq.tq_parent != NULL) {
959 int ct;
960 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
961 // Make sure data structures in consistent state before querying them
962 // Seems to work without this for digital/alpha, needed for IBM/RS6000
963 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000964
Jonathan Peyton30419822017-05-12 18:01:32 +0000965 ct = --(queue->tq_ref_count);
966 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
967 global_tid, queue, ct));
968 KMP_DEBUG_ASSERT(ct >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000969
Jonathan Peyton30419822017-05-12 18:01:32 +0000970 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000971 }
972
Jonathan Peyton30419822017-05-12 18:01:32 +0000973 return pt;
974 }
975
976 if (queue->tq.tq_parent != NULL) {
977 int ct;
978 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
979 // Make sure data structures in consistent state before querying them
980 // Seems to work without this for digital/alpha, needed for IBM/RS6000
981 KMP_MB();
982
983 ct = --(queue->tq_ref_count);
984 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
985 global_tid, queue, ct));
986 KMP_DEBUG_ASSERT(ct >= 0);
987 }
988 queue = queue->tq.tq_parent;
989
990 if (queue != NULL)
991 __kmp_release_lock(&queue->tq_link_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000992 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000993 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000994
Jonathan Peyton30419822017-05-12 18:01:32 +0000995 pt = __kmp_find_task_in_descendant_queue(global_tid, tq->tq_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000996
Jonathan Peyton30419822017-05-12 18:01:32 +0000997 return pt;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000998}
999
Jonathan Peyton30419822017-05-12 18:01:32 +00001000static int __kmp_taskq_tasks_finished(kmpc_task_queue_t *queue) {
1001 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001002
Jonathan Peyton30419822017-05-12 18:01:32 +00001003 /* KMP_MB(); */ /* is this really necessary? */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001004
Jonathan Peyton30419822017-05-12 18:01:32 +00001005 for (i = 0; i < queue->tq_nproc; i++) {
1006 if (queue->tq_th_thunks[i].ai_data != 0)
1007 return FALSE;
1008 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001009
Jonathan Peyton30419822017-05-12 18:01:32 +00001010 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001011}
1012
Jonathan Peyton30419822017-05-12 18:01:32 +00001013static int __kmp_taskq_has_any_children(kmpc_task_queue_t *queue) {
1014 return (queue->tq_first_child != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001015}
1016
Jonathan Peyton30419822017-05-12 18:01:32 +00001017static void __kmp_remove_queue_from_tree(kmp_taskq_t *tq, kmp_int32 global_tid,
1018 kmpc_task_queue_t *queue,
1019 int in_parallel) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001020#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001021 kmp_int32 i;
1022 kmpc_thunk_t *thunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001023#endif
1024
Jonathan Peyton30419822017-05-12 18:01:32 +00001025 KF_TRACE(50,
1026 ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1027 KF_DUMP(50, __kmp_dump_task_queue(tq, queue, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001028
Jonathan Peyton30419822017-05-12 18:01:32 +00001029 /* sub-queue in a recursion, not the root task queue */
1030 KMP_DEBUG_ASSERT(queue->tq.tq_parent != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001031
Jonathan Peyton30419822017-05-12 18:01:32 +00001032 if (in_parallel) {
1033 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1034 // Make sure data structures are in consistent state before querying them
1035 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
1036 KMP_MB();
1037 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001038
Jonathan Peyton30419822017-05-12 18:01:32 +00001039 KMP_DEBUG_ASSERT(queue->tq_first_child == NULL);
1040
1041 /* unlink queue from its siblings if any at this level */
1042 if (queue->tq_prev_child != NULL)
1043 queue->tq_prev_child->tq_next_child = queue->tq_next_child;
1044 if (queue->tq_next_child != NULL)
1045 queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
1046 if (queue->tq.tq_parent->tq_first_child == queue)
1047 queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
1048
1049 queue->tq_prev_child = NULL;
1050 queue->tq_next_child = NULL;
1051
1052 if (in_parallel) {
1053 KMP_DEBUG_REF_CTS(
1054 ("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
1055 __LINE__, global_tid, queue, queue->tq_ref_count));
1056
1057 /* wait until all other threads have stopped accessing this queue */
1058 while (queue->tq_ref_count > 1) {
1059 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1060
1061 KMP_WAIT_YIELD((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE,
1062 NULL);
1063
1064 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1065 // Make sure data structures are in consistent state before querying them
1066 // Seems to work without this for digital/alpha, needed for IBM/RS6000
1067 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001068 }
1069
Jonathan Peyton30419822017-05-12 18:01:32 +00001070 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1071 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001072
Jonathan Peyton30419822017-05-12 18:01:32 +00001073 KMP_DEBUG_REF_CTS(
1074 ("line %d gtid %d: Q %p freeing queue\n", __LINE__, global_tid, queue));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001075
1076#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001077 KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
1078 KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001079
Jonathan Peyton30419822017-05-12 18:01:32 +00001080 for (i = 0; i < queue->tq_nproc; i++) {
1081 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1082 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001083
Jonathan Peyton30419822017-05-12 18:01:32 +00001084 i = 0;
1085 for (thunk = queue->tq_free_thunks; thunk != NULL;
1086 thunk = thunk->th.th_next_free)
1087 ++i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001088
Jonathan Peyton30419822017-05-12 18:01:32 +00001089 KMP_ASSERT(i ==
1090 queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001091#endif
1092
Jonathan Peyton30419822017-05-12 18:01:32 +00001093 /* release storage for queue entry */
1094 __kmp_free_taskq(tq, queue, TRUE, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001095
Jonathan Peyton30419822017-05-12 18:01:32 +00001096 KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1097 KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001098}
1099
Jonathan Peyton30419822017-05-12 18:01:32 +00001100/* Starting from indicated queue, proceed downward through tree and remove all
1101 taskqs which are finished, but only go down to taskqs which have the "nowait"
1102 clause present. Assume this is only called when in_parallel=TRUE. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001103
Jonathan Peyton30419822017-05-12 18:01:32 +00001104static void __kmp_find_and_remove_finished_child_taskq(
1105 kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) {
1106 kmpc_task_queue_t *queue = curr_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001107
Jonathan Peyton30419822017-05-12 18:01:32 +00001108 if (curr_queue->tq_first_child != NULL) {
1109 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
1110 // Make sure data structures are in consistent state before querying them
1111 // Seems to work without this call for digital/alpha, needed for IBM/RS6000
1112 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001113
Jonathan Peyton30419822017-05-12 18:01:32 +00001114 queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
1115 if (queue != NULL) {
1116 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
1117 return;
1118 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001119
Jonathan Peyton30419822017-05-12 18:01:32 +00001120 while (queue != NULL) {
1121 kmpc_task_queue_t *next;
1122 int ct = ++(queue->tq_ref_count);
1123 KMP_DEBUG_REF_CTS(
1124 ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001125
Jonathan Peyton30419822017-05-12 18:01:32 +00001126 /* although reference count stays active during descendant walk, */
1127 /* shouldn't matter since if children still exist, reference */
1128 /* counts aren't being monitored anyway */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001129
Jonathan Peyton30419822017-05-12 18:01:32 +00001130 if (queue->tq_flags & TQF_IS_NOWAIT) {
1131 __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001132
Jonathan Peyton30419822017-05-12 18:01:32 +00001133 if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) &&
1134 (queue->tq_nfull == 0) && __kmp_taskq_tasks_finished(queue) &&
1135 !__kmp_taskq_has_any_children(queue)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001136
Jonathan Peyton30419822017-05-12 18:01:32 +00001137 /* Only remove this if we have not already marked it for deallocation.
1138 This should prevent multiple threads from trying to free this. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001139
Jonathan Peyton30419822017-05-12 18:01:32 +00001140 if (__kmp_test_lock(&queue->tq_queue_lck, global_tid)) {
1141 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
1142 queue->tq_flags |= TQF_DEALLOCATED;
1143 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001144
Jonathan Peyton30419822017-05-12 18:01:32 +00001145 __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001146
Jonathan Peyton30419822017-05-12 18:01:32 +00001147 /* Can't do any more here since can't be sure where sibling queue
1148 * is so just exit this level */
1149 return;
1150 } else {
1151 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001152 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001153 }
1154 /* otherwise, just fall through and decrement reference count */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001155 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001156 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001157
Jonathan Peyton30419822017-05-12 18:01:32 +00001158 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
1159 // Make sure data structures are in consistent state before querying them
1160 // Seems to work without this for digital/alpha, needed for IBM/RS6000
1161 KMP_MB();
1162
1163 next = queue->tq_next_child;
1164
1165 ct = --(queue->tq_ref_count);
1166 KMP_DEBUG_REF_CTS(
1167 ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
1168 KMP_DEBUG_ASSERT(ct >= 0);
1169
1170 queue = next;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001171 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001172
1173 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
1174 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001175}
1176
Jonathan Peyton30419822017-05-12 18:01:32 +00001177/* Starting from indicated queue, proceed downward through tree and remove all
1178 taskq's assuming all are finished and assuming NO other threads are executing
1179 at this point. */
1180static void __kmp_remove_all_child_taskq(kmp_taskq_t *tq, kmp_int32 global_tid,
1181 kmpc_task_queue_t *queue) {
1182 kmpc_task_queue_t *next_child;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001183
Jonathan Peyton30419822017-05-12 18:01:32 +00001184 queue = (kmpc_task_queue_t *)queue->tq_first_child;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001185
Jonathan Peyton30419822017-05-12 18:01:32 +00001186 while (queue != NULL) {
1187 __kmp_remove_all_child_taskq(tq, global_tid, queue);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001188
Jonathan Peyton30419822017-05-12 18:01:32 +00001189 next_child = queue->tq_next_child;
1190 queue->tq_flags |= TQF_DEALLOCATED;
1191 __kmp_remove_queue_from_tree(tq, global_tid, queue, FALSE);
1192 queue = next_child;
1193 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001194}
1195
Jonathan Peyton30419822017-05-12 18:01:32 +00001196static void __kmp_execute_task_from_queue(kmp_taskq_t *tq, ident_t *loc,
1197 kmp_int32 global_tid,
1198 kmpc_thunk_t *thunk,
1199 int in_parallel) {
1200 kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
1201 kmp_int32 tid = __kmp_tid_from_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001202
Jonathan Peyton30419822017-05-12 18:01:32 +00001203 KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid));
1204 KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
1205 KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid));
1206 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001207
Jonathan Peyton30419822017-05-12 18:01:32 +00001208 /* For the taskq task, the curr_thunk pushes and pop pairs are set up as
1209 * follows:
1210 *
1211 * happens exactly once:
1212 * 1) __kmpc_taskq : push (if returning thunk only)
1213 * 4) __kmpc_end_taskq_task : pop
1214 *
1215 * optionally happens *each* time taskq task is dequeued/enqueued:
1216 * 2) __kmpc_taskq_task : pop
1217 * 3) __kmp_execute_task_from_queue : push
1218 *
1219 * execution ordering: 1,(2,3)*,4
1220 */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001221
Jonathan Peyton30419822017-05-12 18:01:32 +00001222 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1223 kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
1224 thunk->th.th_shareds =
1225 (kmpc_shared_vars_t *)queue->tq_shareds[index].ai_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001226
Jonathan Peyton30419822017-05-12 18:01:32 +00001227 if (__kmp_env_consistency_check) {
1228 __kmp_push_workshare(global_tid,
1229 (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered
1230 : ct_task,
1231 queue->tq_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001232 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001233 } else {
1234 if (__kmp_env_consistency_check)
1235 __kmp_push_workshare(global_tid, ct_taskq, queue->tq_loc);
1236 }
1237
1238 if (in_parallel) {
1239 thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1240 tq->tq_curr_thunk[tid] = thunk;
1241
1242 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1243 }
1244
1245 KF_TRACE(50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk,
1246 queue, global_tid));
1247 thunk->th_task(global_tid, thunk);
1248 KF_TRACE(50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue,
1249 global_tid));
1250
1251 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1252 if (__kmp_env_consistency_check)
1253 __kmp_pop_workshare(global_tid,
1254 (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered
1255 : ct_task,
1256 queue->tq_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001257
1258 if (in_parallel) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001259 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1260 thunk->th_encl_thunk = NULL;
1261 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001262 }
1263
Jonathan Peyton30419822017-05-12 18:01:32 +00001264 if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
1265 __kmp_taskq_check_ordered(global_tid, thunk);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001266 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001267
1268 __kmp_free_thunk(queue, thunk, in_parallel, global_tid);
1269
1270 KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n",
1271 global_tid, thunk));
1272 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1273
1274 if (in_parallel) {
1275 KMP_MB(); /* needed so thunk put on free list before outstanding thunk
1276 count is decremented */
1277
1278 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
1279
1280 KF_TRACE(
1281 200,
1282 ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
1283 global_tid, queue->tq_th_thunks[tid].ai_data - 1, queue));
1284
1285 queue->tq_th_thunks[tid].ai_data--;
1286
1287 /* KMP_MB(); */ /* is MB really necessary ? */
1288 }
1289
1290 if (queue->tq.tq_parent != NULL && in_parallel) {
1291 int ct;
1292 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1293 ct = --(queue->tq_ref_count);
1294 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1295 KMP_DEBUG_REF_CTS(
1296 ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
1297 KMP_DEBUG_ASSERT(ct >= 0);
1298 }
1299 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001300}
1301
Jim Cownie5e8470a2013-09-27 10:38:44 +00001302/* starts a taskq; creates and returns a thunk for the taskq_task */
1303/* also, returns pointer to shared vars for this thread in "shareds" arg */
Jonathan Peyton30419822017-05-12 18:01:32 +00001304kmpc_thunk_t *__kmpc_taskq(ident_t *loc, kmp_int32 global_tid,
1305 kmpc_task_t taskq_task, size_t sizeof_thunk,
1306 size_t sizeof_shareds, kmp_int32 flags,
1307 kmpc_shared_vars_t **shareds) {
1308 int in_parallel;
1309 kmp_int32 nslots, nthunks, nshareds, nproc;
1310 kmpc_task_queue_t *new_queue, *curr_queue;
1311 kmpc_thunk_t *new_taskq_thunk;
1312 kmp_info_t *th;
1313 kmp_team_t *team;
1314 kmp_taskq_t *tq;
1315 kmp_int32 tid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001316
Jonathan Peyton30419822017-05-12 18:01:32 +00001317 KE_TRACE(10, ("__kmpc_taskq called (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001318
Jonathan Peyton30419822017-05-12 18:01:32 +00001319 th = __kmp_threads[global_tid];
1320 team = th->th.th_team;
1321 tq = &team->t.t_taskq;
1322 nproc = team->t.t_nproc;
1323 tid = __kmp_tid_from_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001324
Jonathan Peyton30419822017-05-12 18:01:32 +00001325 /* find out whether this is a parallel taskq or serialized one. */
1326 in_parallel = in_parallel_context(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001327
Jonathan Peyton30419822017-05-12 18:01:32 +00001328 if (!tq->tq_root) {
1329 if (in_parallel) {
1330 /* Vector ORDERED SECTION to taskq version */
1331 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001332
Jonathan Peyton30419822017-05-12 18:01:32 +00001333 /* Vector ORDERED SECTION to taskq version */
1334 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001335 }
1336
Jim Cownie5e8470a2013-09-27 10:38:44 +00001337 if (in_parallel) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001338 // This shouldn't be a barrier region boundary, it will confuse the user.
1339 /* Need the boundary to be at the end taskq instead. */
1340 if (__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) {
1341 /* Creating the active root queue, and we are not the master thread. */
1342 /* The master thread below created the queue and tasks have been */
1343 /* enqueued, and the master thread released this barrier. This */
1344 /* worker thread can now proceed and execute tasks. See also the */
1345 /* TQF_RELEASE_WORKERS which is used to handle this case. */
1346 *shareds = (kmpc_shared_vars_t *)tq->tq_root->tq_shareds[tid].ai_data;
1347
1348 KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid));
1349
1350 return NULL;
1351 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001352 }
1353
Jonathan Peyton30419822017-05-12 18:01:32 +00001354 /* master thread only executes this code */
1355 if (tq->tq_curr_thunk_capacity < nproc) {
1356 if (tq->tq_curr_thunk)
1357 __kmp_free(tq->tq_curr_thunk);
1358 else {
1359 /* only need to do this once at outer level, i.e. when tq_curr_thunk is
1360 * still NULL */
1361 __kmp_init_lock(&tq->tq_freelist_lck);
1362 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001363
Jonathan Peyton30419822017-05-12 18:01:32 +00001364 tq->tq_curr_thunk =
1365 (kmpc_thunk_t **)__kmp_allocate(nproc * sizeof(kmpc_thunk_t *));
1366 tq->tq_curr_thunk_capacity = nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001367 }
1368
Jonathan Peyton30419822017-05-12 18:01:32 +00001369 if (in_parallel)
1370 tq->tq_global_flags = TQF_RELEASE_WORKERS;
1371 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001372
Jonathan Peyton30419822017-05-12 18:01:32 +00001373 /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */
1374 /* on some heuristics (e.g., depth of queue nesting?). */
1375 nslots = (in_parallel) ? (2 * nproc) : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001376
Jonathan Peyton30419822017-05-12 18:01:32 +00001377 /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */
1378 /* jobs being executed by other threads, and one extra for taskq slot */
1379 nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1)
1380 : nslots + 2;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001381
Jonathan Peyton30419822017-05-12 18:01:32 +00001382 /* Only the root taskq gets a per-thread array of shareds. */
1383 /* The rest of the taskq's only get one copy of the shared vars. */
1384 nshareds = (!tq->tq_root && in_parallel) ? nproc : 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001385
Jonathan Peyton30419822017-05-12 18:01:32 +00001386 /* create overall queue data structure and its components that require
1387 * allocation */
1388 new_queue = __kmp_alloc_taskq(tq, in_parallel, nslots, nthunks, nshareds,
1389 nproc, sizeof_thunk, sizeof_shareds,
1390 &new_taskq_thunk, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001391
Jonathan Peyton30419822017-05-12 18:01:32 +00001392 /* rest of new_queue initializations */
1393 new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001394
Jonathan Peyton30419822017-05-12 18:01:32 +00001395 if (in_parallel) {
1396 new_queue->tq_tasknum_queuing = 0;
1397 new_queue->tq_tasknum_serving = 0;
1398 new_queue->tq_flags |= TQF_PARALLEL_CONTEXT;
1399 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001400
Jonathan Peyton30419822017-05-12 18:01:32 +00001401 new_queue->tq_taskq_slot = NULL;
1402 new_queue->tq_nslots = nslots;
1403 new_queue->tq_hiwat = HIGH_WATER_MARK(nslots);
1404 new_queue->tq_nfull = 0;
1405 new_queue->tq_head = 0;
1406 new_queue->tq_tail = 0;
1407 new_queue->tq_loc = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001408
Jonathan Peyton30419822017-05-12 18:01:32 +00001409 if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
1410 /* prepare to serve the first-queued task's ORDERED directive */
1411 new_queue->tq_tasknum_serving = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001412
Jonathan Peyton30419822017-05-12 18:01:32 +00001413 /* Vector ORDERED SECTION to taskq version */
1414 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001415
Jonathan Peyton30419822017-05-12 18:01:32 +00001416 /* Vector ORDERED SECTION to taskq version */
1417 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
1418 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001419
Jonathan Peyton30419822017-05-12 18:01:32 +00001420 /* create a new thunk for the taskq_task in the new_queue */
1421 *shareds = (kmpc_shared_vars_t *)new_queue->tq_shareds[0].ai_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001422
Jonathan Peyton30419822017-05-12 18:01:32 +00001423 new_taskq_thunk->th.th_shareds = *shareds;
1424 new_taskq_thunk->th_task = taskq_task;
1425 new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK;
1426 new_taskq_thunk->th_status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001427
Jonathan Peyton30419822017-05-12 18:01:32 +00001428 KMP_DEBUG_ASSERT(new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001429
Jonathan Peyton30419822017-05-12 18:01:32 +00001430 // Make sure these inits complete before threads start using this queue
1431 /* KMP_MB(); */ // (necessary?)
1432
1433 /* insert the new task queue into the tree, but only after all fields
1434 * initialized */
1435
1436 if (in_parallel) {
1437 if (!tq->tq_root) {
1438 new_queue->tq.tq_parent = NULL;
1439 new_queue->tq_first_child = NULL;
1440 new_queue->tq_next_child = NULL;
1441 new_queue->tq_prev_child = NULL;
1442 new_queue->tq_ref_count = 1;
1443 tq->tq_root = new_queue;
1444 } else {
1445 curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
1446 new_queue->tq.tq_parent = curr_queue;
1447 new_queue->tq_first_child = NULL;
1448 new_queue->tq_prev_child = NULL;
1449 new_queue->tq_ref_count =
1450 1; /* for this the thread that built the queue */
1451
1452 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n", __LINE__,
1453 global_tid, new_queue, new_queue->tq_ref_count));
1454
1455 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
1456
1457 // Make sure data structures are in consistent state before querying them
1458 // Seems to work without this for digital/alpha, needed for IBM/RS6000
1459 KMP_MB();
1460
1461 new_queue->tq_next_child =
1462 (struct kmpc_task_queue_t *)curr_queue->tq_first_child;
1463
1464 if (curr_queue->tq_first_child != NULL)
1465 curr_queue->tq_first_child->tq_prev_child = new_queue;
1466
1467 curr_queue->tq_first_child = new_queue;
1468
1469 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001470 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001471
1472 /* set up thunk stack only after code that determines curr_queue above */
1473 new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1474 tq->tq_curr_thunk[tid] = new_taskq_thunk;
1475
1476 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1477 } else {
1478 new_taskq_thunk->th_encl_thunk = 0;
1479 new_queue->tq.tq_parent = NULL;
1480 new_queue->tq_first_child = NULL;
1481 new_queue->tq_next_child = NULL;
1482 new_queue->tq_prev_child = NULL;
1483 new_queue->tq_ref_count = 1;
1484 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001485
1486#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001487 KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid));
1488 KF_DUMP(150, __kmp_dump_thunk(tq, new_taskq_thunk, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001489
Jonathan Peyton30419822017-05-12 18:01:32 +00001490 if (in_parallel) {
1491 KF_TRACE(25,
1492 ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
1493 } else {
1494 KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue,
1495 global_tid));
1496 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001497
Jonathan Peyton30419822017-05-12 18:01:32 +00001498 KF_DUMP(25, __kmp_dump_task_queue(tq, new_queue, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001499
Jonathan Peyton30419822017-05-12 18:01:32 +00001500 if (in_parallel) {
1501 KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
1502 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001503#endif /* KMP_DEBUG */
1504
Jonathan Peyton30419822017-05-12 18:01:32 +00001505 if (__kmp_env_consistency_check)
1506 __kmp_push_workshare(global_tid, ct_taskq, new_queue->tq_loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001507
Jonathan Peyton30419822017-05-12 18:01:32 +00001508 KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001509
Jonathan Peyton30419822017-05-12 18:01:32 +00001510 return new_taskq_thunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001511}
1512
Jim Cownie5e8470a2013-09-27 10:38:44 +00001513/* ends a taskq; last thread out destroys the queue */
1514
Jonathan Peyton30419822017-05-12 18:01:32 +00001515void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
1516 kmpc_thunk_t *taskq_thunk) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001517#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001518 kmp_int32 i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001519#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001520 kmp_taskq_t *tq;
1521 int in_parallel;
1522 kmp_info_t *th;
1523 kmp_int32 is_outermost;
1524 kmpc_task_queue_t *queue;
1525 kmpc_thunk_t *thunk;
1526 int nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001527
Jonathan Peyton30419822017-05-12 18:01:32 +00001528 KE_TRACE(10, ("__kmpc_end_taskq called (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001529
Jonathan Peyton30419822017-05-12 18:01:32 +00001530 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1531 nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001532
Jonathan Peyton30419822017-05-12 18:01:32 +00001533 /* For the outermost taskq only, all but one thread will have taskq_thunk ==
1534 * NULL */
1535 queue = (taskq_thunk == NULL) ? tq->tq_root
1536 : taskq_thunk->th.th_shareds->sv_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001537
Jonathan Peyton30419822017-05-12 18:01:32 +00001538 KE_TRACE(50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
1539 is_outermost = (queue == tq->tq_root);
1540 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001541
Jonathan Peyton30419822017-05-12 18:01:32 +00001542 if (in_parallel) {
1543 kmp_uint32 spins;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001544
Jonathan Peyton30419822017-05-12 18:01:32 +00001545 /* this is just a safeguard to release the waiting threads if */
1546 /* the outermost taskq never queues a task */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001547
Jonathan Peyton30419822017-05-12 18:01:32 +00001548 if (is_outermost && (KMP_MASTER_GTID(global_tid))) {
1549 if (tq->tq_global_flags & TQF_RELEASE_WORKERS) {
1550 /* no lock needed, workers are still in spin mode */
1551 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001552
Jonathan Peyton30419822017-05-12 18:01:32 +00001553 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1554 }
1555 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001556
Jonathan Peyton30419822017-05-12 18:01:32 +00001557 /* keep dequeueing work until all tasks are queued and dequeued */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001558
Jonathan Peyton30419822017-05-12 18:01:32 +00001559 do {
1560 /* wait until something is available to dequeue */
1561 KMP_INIT_YIELD(spins);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001562
Jonathan Peyton30419822017-05-12 18:01:32 +00001563 while ((queue->tq_nfull == 0) && (queue->tq_taskq_slot == NULL) &&
1564 (!__kmp_taskq_has_any_children(queue)) &&
1565 (!(queue->tq_flags & TQF_ALL_TASKS_QUEUED))) {
1566 KMP_YIELD_WHEN(TRUE, spins);
1567 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001568
Jonathan Peyton30419822017-05-12 18:01:32 +00001569 /* check to see if we can execute tasks in the queue */
1570 while (((queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL)) &&
1571 (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL) {
1572 KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk,
1573 queue, global_tid));
1574 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1575 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001576
Jonathan Peyton30419822017-05-12 18:01:32 +00001577 /* see if work found can be found in a descendant queue */
1578 if ((__kmp_taskq_has_any_children(queue)) &&
1579 (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) !=
1580 NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001581
Jonathan Peyton30419822017-05-12 18:01:32 +00001582 KF_TRACE(50,
1583 ("Stole thunk: %p in descendant queue: %p while waiting in "
1584 "queue: %p (%d)\n",
1585 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001586
Jonathan Peyton30419822017-05-12 18:01:32 +00001587 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1588 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001589
Jonathan Peyton30419822017-05-12 18:01:32 +00001590 } while ((!(queue->tq_flags & TQF_ALL_TASKS_QUEUED)) ||
1591 (queue->tq_nfull != 0));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001592
Jonathan Peyton30419822017-05-12 18:01:32 +00001593 KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue,
1594 global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001595
Jonathan Peyton30419822017-05-12 18:01:32 +00001596 /* wait while all tasks are not finished and more work found
1597 in descendant queues */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001598
Jonathan Peyton30419822017-05-12 18:01:32 +00001599 while ((!__kmp_taskq_tasks_finished(queue)) &&
1600 (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) !=
1601 NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001602
Jonathan Peyton30419822017-05-12 18:01:32 +00001603 KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in "
1604 "queue: %p (%d)\n",
Jim Cownie5e8470a2013-09-27 10:38:44 +00001605 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1606
Jonathan Peyton30419822017-05-12 18:01:32 +00001607 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001608 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001609
Jonathan Peyton30419822017-05-12 18:01:32 +00001610 KF_TRACE(50, ("No work found in descendent queues or all work finished in "
1611 "queue: %p (%d)\n",
1612 queue, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001613
Jonathan Peyton30419822017-05-12 18:01:32 +00001614 if (!is_outermost) {
1615 /* need to return if NOWAIT present and not outermost taskq */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001616
Jonathan Peyton30419822017-05-12 18:01:32 +00001617 if (queue->tq_flags & TQF_IS_NOWAIT) {
1618 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1619 queue->tq_ref_count--;
1620 KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0);
1621 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001622
Jonathan Peyton30419822017-05-12 18:01:32 +00001623 KE_TRACE(
1624 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001625
Jonathan Peyton30419822017-05-12 18:01:32 +00001626 return;
1627 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001628
Jonathan Peyton30419822017-05-12 18:01:32 +00001629 __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
1630
1631 /* WAIT until all tasks are finished and no child queues exist before
1632 * proceeding */
1633 KMP_INIT_YIELD(spins);
1634
1635 while (!__kmp_taskq_tasks_finished(queue) ||
1636 __kmp_taskq_has_any_children(queue)) {
1637 thunk = __kmp_find_task_in_ancestor_queue(tq, global_tid, queue);
1638
1639 if (thunk != NULL) {
1640 KF_TRACE(50,
1641 ("Stole thunk: %p in ancestor queue: %p while waiting in "
1642 "queue: %p (%d)\n",
1643 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1644 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk,
1645 in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001646 }
1647
Jonathan Peyton30419822017-05-12 18:01:32 +00001648 KMP_YIELD_WHEN(thunk == NULL, spins);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001649
Jonathan Peyton30419822017-05-12 18:01:32 +00001650 __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
1651 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001652
Jonathan Peyton30419822017-05-12 18:01:32 +00001653 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
1654 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001655 queue->tq_flags |= TQF_DEALLOCATED;
Jonathan Peyton30419822017-05-12 18:01:32 +00001656 }
1657 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1658
1659 /* only the allocating thread can deallocate the queue */
1660 if (taskq_thunk != NULL) {
1661 __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE);
1662 }
1663
1664 KE_TRACE(
1665 10,
1666 ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n",
1667 global_tid));
1668
1669 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001670 }
1671
Jonathan Peyton30419822017-05-12 18:01:32 +00001672 // Outermost Queue: steal work from descendants until all tasks are finished
1673
1674 KMP_INIT_YIELD(spins);
1675
1676 while (!__kmp_taskq_tasks_finished(queue)) {
1677 thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
1678
1679 if (thunk != NULL) {
1680 KF_TRACE(50,
1681 ("Stole thunk: %p in descendant queue: %p while waiting in "
1682 "queue: %p (%d)\n",
1683 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1684
1685 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1686 }
1687
1688 KMP_YIELD_WHEN(thunk == NULL, spins);
1689 }
1690
1691 /* Need this barrier to prevent destruction of queue before threads have all
1692 * executed above code */
1693 /* This may need to be done earlier when NOWAIT is implemented for the
1694 * outermost level */
1695
1696 if (!__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) {
1697 /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here; */
1698 /* for right now, everybody waits, and the master thread destroys the */
1699 /* remaining queues. */
1700
1701 __kmp_remove_all_child_taskq(tq, global_tid, queue);
1702
1703 /* Now destroy the root queue */
1704 KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n",
1705 global_tid, queue));
1706 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1707
1708#ifdef KMP_DEBUG
1709 /* the root queue entry */
1710 KMP_DEBUG_ASSERT((queue->tq.tq_parent == NULL) &&
1711 (queue->tq_next_child == NULL));
1712
1713 /* children must all be gone by now because of barrier above */
1714 KMP_DEBUG_ASSERT(queue->tq_first_child == NULL);
1715
1716 for (i = 0; i < nproc; i++) {
1717 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1718 }
1719
1720 for (i = 0, thunk = queue->tq_free_thunks; thunk != NULL;
1721 i++, thunk = thunk->th.th_next_free)
1722 ;
1723
1724 KMP_DEBUG_ASSERT(i ==
1725 queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
1726
1727 for (i = 0; i < nproc; i++) {
1728 KMP_DEBUG_ASSERT(!tq->tq_curr_thunk[i]);
1729 }
1730#endif
1731 /* unlink the root queue entry */
1732 tq->tq_root = NULL;
1733
1734 /* release storage for root queue entry */
1735 KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue,
1736 global_tid));
1737
1738 queue->tq_flags |= TQF_DEALLOCATED;
1739 __kmp_free_taskq(tq, queue, in_parallel, global_tid);
1740
1741 KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
1742
1743 /* release the workers now that the data structures are up to date */
1744 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1745 }
1746
1747 th = __kmp_threads[global_tid];
1748
1749 /* Reset ORDERED SECTION to parallel version */
1750 th->th.th_dispatch->th_deo_fcn = 0;
1751
1752 /* Reset ORDERED SECTION to parallel version */
1753 th->th.th_dispatch->th_dxo_fcn = 0;
1754 } else {
1755 /* in serial execution context, dequeue the last task */
1756 /* and execute it, if there were any tasks encountered */
1757
1758 if (queue->tq_nfull > 0) {
1759 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
1760
1761 thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
1762
1763 if (queue->tq_flags & TQF_IS_LAST_TASK) {
1764 /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */
1765 /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
1766 /* instrumentation does copy-out. */
1767
1768 /* no need for test_then_or call since already locked */
1769 thunk->th_flags |= TQF_IS_LAST_TASK;
1770 }
1771
1772 KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid,
1773 thunk, queue));
1774
1775 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1776 }
1777
1778 // destroy the unattached serial queue now that there is no more work to do
1779 KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n",
1780 queue, global_tid));
1781 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1782
1783#ifdef KMP_DEBUG
1784 i = 0;
1785 for (thunk = queue->tq_free_thunks; thunk != NULL;
1786 thunk = thunk->th.th_next_free)
1787 ++i;
1788 KMP_DEBUG_ASSERT(i == queue->tq_nslots + 1);
1789#endif
1790 /* release storage for unattached serial queue */
1791 KF_TRACE(50,
1792 ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
1793
1794 queue->tq_flags |= TQF_DEALLOCATED;
1795 __kmp_free_taskq(tq, queue, in_parallel, global_tid);
1796 }
1797
1798 KE_TRACE(10, ("__kmpc_end_taskq return (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001799}
1800
1801/* Enqueues a task for thunk previously created by __kmpc_task_buffer. */
1802/* Returns nonzero if just filled up queue */
1803
Jonathan Peyton30419822017-05-12 18:01:32 +00001804kmp_int32 __kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) {
1805 kmp_int32 ret;
1806 kmpc_task_queue_t *queue;
1807 int in_parallel;
1808 kmp_taskq_t *tq;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001809
Jonathan Peyton30419822017-05-12 18:01:32 +00001810 KE_TRACE(10, ("__kmpc_task called (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001811
Jonathan Peyton30419822017-05-12 18:01:32 +00001812 KMP_DEBUG_ASSERT(!(thunk->th_flags &
1813 TQF_TASKQ_TASK)); /* thunk->th_task is a regular task */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001814
Jonathan Peyton30419822017-05-12 18:01:32 +00001815 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1816 queue = thunk->th.th_shareds->sv_queue;
1817 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001818
Jonathan Peyton30419822017-05-12 18:01:32 +00001819 if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
1820 thunk->th_tasknum = ++queue->tq_tasknum_queuing;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001821
Jonathan Peyton30419822017-05-12 18:01:32 +00001822 /* For serial execution dequeue the preceding task and execute it, if one
1823 * exists */
1824 /* This cannot be the last task. That one is handled in __kmpc_end_taskq */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001825
Jonathan Peyton30419822017-05-12 18:01:32 +00001826 if (!in_parallel && queue->tq_nfull > 0) {
1827 kmpc_thunk_t *prev_thunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001828
Jonathan Peyton30419822017-05-12 18:01:32 +00001829 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001830
Jonathan Peyton30419822017-05-12 18:01:32 +00001831 prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001832
Jonathan Peyton30419822017-05-12 18:01:32 +00001833 KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid,
1834 prev_thunk, queue));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001835
Jonathan Peyton30419822017-05-12 18:01:32 +00001836 __kmp_execute_task_from_queue(tq, loc, global_tid, prev_thunk, in_parallel);
1837 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001838
Jonathan Peyton30419822017-05-12 18:01:32 +00001839 /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private
1840 variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the
1841 task queue is not full and allocates a thunk (which is then passed to
1842 __kmpc_task()). So, the enqueue below should never fail due to a full
1843 queue. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001844
Jonathan Peyton30419822017-05-12 18:01:32 +00001845 KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid));
1846 KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001847
Jonathan Peyton30419822017-05-12 18:01:32 +00001848 ret = __kmp_enqueue_task(tq, global_tid, queue, thunk, in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001849
Jonathan Peyton30419822017-05-12 18:01:32 +00001850 KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid));
1851 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001852
Jonathan Peyton30419822017-05-12 18:01:32 +00001853 KE_TRACE(10, ("__kmpc_task return (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001854
Jonathan Peyton30419822017-05-12 18:01:32 +00001855 return ret;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001856}
1857
1858/* enqueues a taskq_task for thunk previously created by __kmpc_taskq */
1859/* this should never be called unless in a parallel context */
1860
Jonathan Peyton30419822017-05-12 18:01:32 +00001861void __kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk,
1862 kmp_int32 status) {
1863 kmpc_task_queue_t *queue;
1864 kmp_taskq_t *tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1865 int tid = __kmp_tid_from_gtid(global_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001866
Jonathan Peyton30419822017-05-12 18:01:32 +00001867 KE_TRACE(10, ("__kmpc_taskq_task called (%d)\n", global_tid));
1868 KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid));
1869 KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001870
Jonathan Peyton30419822017-05-12 18:01:32 +00001871 queue = thunk->th.th_shareds->sv_queue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001872
Jonathan Peyton30419822017-05-12 18:01:32 +00001873 if (__kmp_env_consistency_check)
1874 __kmp_pop_workshare(global_tid, ct_taskq, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001875
Jonathan Peyton30419822017-05-12 18:01:32 +00001876 /* thunk->th_task is the taskq_task */
1877 KMP_DEBUG_ASSERT(thunk->th_flags & TQF_TASKQ_TASK);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001878
Jonathan Peyton30419822017-05-12 18:01:32 +00001879 /* not supposed to call __kmpc_taskq_task if it's already enqueued */
1880 KMP_DEBUG_ASSERT(queue->tq_taskq_slot == NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001881
Jonathan Peyton30419822017-05-12 18:01:32 +00001882 /* dequeue taskq thunk from curr_thunk stack */
1883 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1884 thunk->th_encl_thunk = NULL;
1885
1886 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1887
1888 thunk->th_status = status;
1889
1890 // Flush thunk->th_status before taskq_task enqueued to avoid race condition
1891 KMP_MB();
1892
1893 /* enqueue taskq_task in thunk into special slot in queue */
1894 /* GEH - probably don't need to lock taskq slot since only one */
1895 /* thread enqueues & already a lock set at dequeue point */
1896
1897 queue->tq_taskq_slot = thunk;
1898
1899 KE_TRACE(10, ("__kmpc_taskq_task return (%d)\n", global_tid));
1900}
1901
1902/* ends a taskq_task; done generating tasks */
1903
1904void __kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid,
1905 kmpc_thunk_t *thunk) {
1906 kmp_taskq_t *tq;
1907 kmpc_task_queue_t *queue;
1908 int in_parallel;
1909 int tid;
1910
1911 KE_TRACE(10, ("__kmpc_end_taskq_task called (%d)\n", global_tid));
1912
1913 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1914 queue = thunk->th.th_shareds->sv_queue;
1915 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1916 tid = __kmp_tid_from_gtid(global_tid);
1917
1918 if (__kmp_env_consistency_check)
1919 __kmp_pop_workshare(global_tid, ct_taskq, loc);
1920
1921 if (in_parallel) {
1922#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1923
1924 KMP_TEST_THEN_OR32(&queue->tq_flags, (kmp_int32)TQF_ALL_TASKS_QUEUED);
1925#else
1926 {
1927 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
1928
1929 // Make sure data structures are in consistent state before querying them
1930 // Seems to work without this for digital/alpha, needed for IBM/RS6000
1931 KMP_MB();
1932
1933 queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
1934 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1935 }
1936#endif
1937 }
1938
1939 if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
1940 /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in
1941 the queue if TQF_IS_LASTPRIVATE so we can positively identify that last
1942 task and run it with its TQF_IS_LAST_TASK bit turned on in th_flags.
1943 When __kmpc_end_taskq_task() is called we are done generating all the
1944 tasks, so we know the last one in the queue is the lastprivate task.
1945 Mark the queue as having gotten to this state via tq_flags &
1946 TQF_IS_LAST_TASK; when that task actually executes mark it via th_flags &
1947 TQF_IS_LAST_TASK (this th_flags bit signals the instrumented code to do
1948 copy-outs after execution). */
1949 if (!in_parallel) {
1950 /* No synchronization needed for serial context */
1951 queue->tq_flags |= TQF_IS_LAST_TASK;
1952 } else {
1953#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1954
1955 KMP_TEST_THEN_OR32(&queue->tq_flags, (kmp_int32)TQF_IS_LAST_TASK);
1956#else
1957 {
1958 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
1959
1960 // Make sure data structures in consistent state before querying them
1961 // Seems to work without this for digital/alpha, needed for IBM/RS6000
1962 KMP_MB();
1963
1964 queue->tq_flags |= TQF_IS_LAST_TASK;
1965 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1966 }
1967#endif
1968 /* to prevent race condition where last task is dequeued but */
1969 /* flag isn't visible yet (not sure about this) */
1970 KMP_MB();
1971 }
1972 }
1973
1974 /* dequeue taskq thunk from curr_thunk stack */
1975 if (in_parallel) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001976 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1977 thunk->th_encl_thunk = NULL;
1978
Jonathan Peyton30419822017-05-12 18:01:32 +00001979 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1980 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001981
Jonathan Peyton30419822017-05-12 18:01:32 +00001982 KE_TRACE(10, ("__kmpc_end_taskq_task return (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001983}
1984
1985/* returns thunk for a regular task based on taskq_thunk */
1986/* (__kmpc_taskq_task does the analogous thing for a TQF_TASKQ_TASK) */
1987
Jonathan Peyton30419822017-05-12 18:01:32 +00001988kmpc_thunk_t *__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid,
1989 kmpc_thunk_t *taskq_thunk, kmpc_task_t task) {
1990 kmp_taskq_t *tq;
1991 kmpc_task_queue_t *queue;
1992 kmpc_thunk_t *new_thunk;
1993 int in_parallel;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994
Jonathan Peyton30419822017-05-12 18:01:32 +00001995 KE_TRACE(10, ("__kmpc_task_buffer called (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001996
Jonathan Peyton30419822017-05-12 18:01:32 +00001997 KMP_DEBUG_ASSERT(
1998 taskq_thunk->th_flags &
1999 TQF_TASKQ_TASK); /* taskq_thunk->th_task is the taskq_task */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002000
Jonathan Peyton30419822017-05-12 18:01:32 +00002001 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
2002 queue = taskq_thunk->th.th_shareds->sv_queue;
2003 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002004
Jonathan Peyton30419822017-05-12 18:01:32 +00002005 /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private
2006 variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the
2007 task queue is not full and allocates a thunk (which is then passed to
2008 __kmpc_task()). So, we can pre-allocate a thunk here assuming it will be
2009 the next to be enqueued in __kmpc_task(). */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002010
Jonathan Peyton30419822017-05-12 18:01:32 +00002011 new_thunk = __kmp_alloc_thunk(queue, in_parallel, global_tid);
2012 new_thunk->th.th_shareds = (kmpc_shared_vars_t *)queue->tq_shareds[0].ai_data;
2013 new_thunk->th_encl_thunk = NULL;
2014 new_thunk->th_task = task;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015
Jonathan Peyton30419822017-05-12 18:01:32 +00002016 /* GEH - shouldn't need to lock the read of tq_flags here */
2017 new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002018
Jonathan Peyton30419822017-05-12 18:01:32 +00002019 new_thunk->th_status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002020
Jonathan Peyton30419822017-05-12 18:01:32 +00002021 KMP_DEBUG_ASSERT(!(new_thunk->th_flags & TQF_TASKQ_TASK));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002022
Jonathan Peyton30419822017-05-12 18:01:32 +00002023 KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid));
2024 KF_DUMP(100, __kmp_dump_thunk(tq, new_thunk, global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002025
Jonathan Peyton30419822017-05-12 18:01:32 +00002026 KE_TRACE(10, ("__kmpc_task_buffer return (%d)\n", global_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002027
Jonathan Peyton30419822017-05-12 18:01:32 +00002028 return new_thunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002029}