blob: e7b50ff0b8a171e03bc3215804baec8124ceac24 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_runtime.cpp -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
Chandler Carruth57b08b02019-01-19 10:56:40 +00007// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Jim Cownie5e8470a2013-09-27 10:38:44 +000010//
11//===----------------------------------------------------------------------===//
12
Jim Cownie5e8470a2013-09-27 10:38:44 +000013#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000014#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000015#include "kmp_atomic.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000016#include "kmp_environment.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000017#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000018#include "kmp_i18n.h"
19#include "kmp_io.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000020#include "kmp_itt.h"
21#include "kmp_settings.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000022#include "kmp_stats.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000023#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000024#include "kmp_wait_release.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000025#include "kmp_wrapper_getpid.h"
Jonathan Peytonf6399362018-07-09 17:51:13 +000026#include "kmp_dispatch.h"
27#if KMP_USE_HIER_SCHED
28#include "kmp_dispatch_hier.h"
29#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000030
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000031#if OMPT_SUPPORT
32#include "ompt-specific.h"
33#endif
34
Jim Cownie5e8470a2013-09-27 10:38:44 +000035/* these are temporary issues to be dealt with */
36#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000037
Jim Cownie5e8470a2013-09-27 10:38:44 +000038#if KMP_OS_WINDOWS
39#include <process.h>
40#endif
41
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000042#include "tsan_annotations.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000043
44#if defined(KMP_GOMP_COMPAT)
Jonathan Peyton30419822017-05-12 18:01:32 +000045char const __kmp_version_alt_comp[] =
46 KMP_VERSION_PREFIX "alternative compiler support: yes";
Jim Cownie5e8470a2013-09-27 10:38:44 +000047#endif /* defined(KMP_GOMP_COMPAT) */
48
49char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peytone844a542017-03-06 22:07:40 +000050#if OMP_50_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000051 "5.0 (201611)";
Jonathan Peytone844a542017-03-06 22:07:40 +000052#elif OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000053 "4.5 (201511)";
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000054#elif OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000055 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000056#else
Jonathan Peyton30419822017-05-12 18:01:32 +000057 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000058#endif
59
60#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +000061char const __kmp_version_lock[] =
62 KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000063#endif /* KMP_DEBUG */
64
Jonathan Peyton30419822017-05-12 18:01:32 +000065#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
Jim Cownie181b4bb2013-12-23 17:28:57 +000066
Jim Cownie5e8470a2013-09-27 10:38:44 +000067/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +000068
Jonathan Peyton37e2ef52018-07-09 17:36:22 +000069#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +000070kmp_info_t __kmp_monitor;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +000071#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000072
Jim Cownie5e8470a2013-09-27 10:38:44 +000073/* Forward declarations */
74
Jonathan Peyton30419822017-05-12 18:01:32 +000075void __kmp_cleanup(void);
Jim Cownie5e8470a2013-09-27 10:38:44 +000076
Jonathan Peyton30419822017-05-12 18:01:32 +000077static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
78 int gtid);
79static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
80 kmp_internal_control_t *new_icvs,
81 ident_t *loc);
Jonathan Peyton2321d572015-06-08 19:25:25 +000082#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +000083static void __kmp_partition_places(kmp_team_t *team,
84 int update_master_only = 0);
Jonathan Peyton2321d572015-06-08 19:25:25 +000085#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000086static void __kmp_do_serial_initialize(void);
87void __kmp_fork_barrier(int gtid, int tid);
88void __kmp_join_barrier(int gtid);
89void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
90 kmp_internal_control_t *new_icvs, ident_t *loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000091
Jim Cownie5e8470a2013-09-27 10:38:44 +000092#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +000093static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000094#endif
95
Jonathan Peyton1800ece2018-01-10 18:27:01 +000096static int __kmp_expand_threads(int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000097#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +000098static int __kmp_unregister_root_other_thread(int gtid);
Jonathan Peyton2321d572015-06-08 19:25:25 +000099#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000100static void __kmp_unregister_library(void); // called by __kmp_internal_end()
101static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
Jonathan Peytoneaa9e402018-01-10 18:21:48 +0000102kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103
Jim Cownie5e8470a2013-09-27 10:38:44 +0000104/* Calculate the identifier of the current thread */
Jonathan Peyton30419822017-05-12 18:01:32 +0000105/* fast (and somewhat portable) way to get unique identifier of executing
106 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
Jonathan Peyton30419822017-05-12 18:01:32 +0000107int __kmp_get_global_thread_id() {
108 int i;
109 kmp_info_t **other_threads;
110 size_t stack_data;
111 char *stack_addr;
112 size_t stack_size;
113 char *stack_base;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000114
Jonathan Peyton30419822017-05-12 18:01:32 +0000115 KA_TRACE(
116 1000,
117 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
118 __kmp_nth, __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000119
Jonathan Peyton30419822017-05-12 18:01:32 +0000120 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
121 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
122 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
123 __kmp_init_gtid for this to work. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000124
Jonathan Peyton30419822017-05-12 18:01:32 +0000125 if (!TCR_4(__kmp_init_gtid))
126 return KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000127
128#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000129 if (TCR_4(__kmp_gtid_mode) >= 3) {
130 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"));
131 return __kmp_gtid;
132 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000133#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000134 if (TCR_4(__kmp_gtid_mode) >= 2) {
135 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"));
136 return __kmp_gtid_get_specific();
137 }
138 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"));
139
140 stack_addr = (char *)&stack_data;
141 other_threads = __kmp_threads;
142
143 /* ATT: The code below is a source of potential bugs due to unsynchronized
144 access to __kmp_threads array. For example:
145 1. Current thread loads other_threads[i] to thr and checks it, it is
146 non-NULL.
147 2. Current thread is suspended by OS.
148 3. Another thread unregisters and finishes (debug versions of free()
149 may fill memory with something like 0xEF).
150 4. Current thread is resumed.
151 5. Current thread reads junk from *thr.
152 TODO: Fix it. --ln */
153
154 for (i = 0; i < __kmp_threads_capacity; i++) {
155
156 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
157 if (!thr)
158 continue;
159
160 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
161 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
162
163 /* stack grows down -- search through all of the active threads */
164
165 if (stack_addr <= stack_base) {
166 size_t stack_diff = stack_base - stack_addr;
167
168 if (stack_diff <= stack_size) {
169 /* The only way we can be closer than the allocated */
170 /* stack size is if we are running on this thread. */
171 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
172 return i;
173 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000174 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000175 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000176
Jonathan Peyton30419822017-05-12 18:01:32 +0000177 /* get specific to try and determine our gtid */
178 KA_TRACE(1000,
179 ("*** __kmp_get_global_thread_id: internal alg. failed to find "
180 "thread, using TLS\n"));
181 i = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000182
Jonathan Peyton30419822017-05-12 18:01:32 +0000183 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000184
Jonathan Peyton30419822017-05-12 18:01:32 +0000185 /* if we havn't been assigned a gtid, then return code */
186 if (i < 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000187 return i;
Jonathan Peyton30419822017-05-12 18:01:32 +0000188
189 /* dynamically updated stack window for uber threads to avoid get_specific
190 call */
191 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
192 KMP_FATAL(StackOverflow, i);
193 }
194
195 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
196 if (stack_addr > stack_base) {
197 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
198 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
199 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
200 stack_base);
201 } else {
202 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
203 stack_base - stack_addr);
204 }
205
206 /* Reprint stack bounds for ubermaster since they have been refined */
207 if (__kmp_storage_map) {
208 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
209 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
210 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
211 other_threads[i]->th.th_info.ds.ds_stacksize,
212 "th_%d stack (refinement)", i);
213 }
214 return i;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000215}
216
Jonathan Peyton30419822017-05-12 18:01:32 +0000217int __kmp_get_global_thread_id_reg() {
218 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000219
Jonathan Peyton30419822017-05-12 18:01:32 +0000220 if (!__kmp_init_serial) {
221 gtid = KMP_GTID_DNE;
222 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000223#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000224 if (TCR_4(__kmp_gtid_mode) >= 3) {
225 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"));
226 gtid = __kmp_gtid;
227 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000228#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000229 if (TCR_4(__kmp_gtid_mode) >= 2) {
230 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
231 gtid = __kmp_gtid_get_specific();
232 } else {
233 KA_TRACE(1000,
234 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
235 gtid = __kmp_get_global_thread_id();
236 }
237
238 /* we must be a new uber master sibling thread */
239 if (gtid == KMP_GTID_DNE) {
240 KA_TRACE(10,
241 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
242 "Registering a new gtid.\n"));
243 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
244 if (!__kmp_init_serial) {
245 __kmp_do_serial_initialize();
246 gtid = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000248 gtid = __kmp_register_root(FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000249 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000250 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
251 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
252 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000253
Jonathan Peyton30419822017-05-12 18:01:32 +0000254 KMP_DEBUG_ASSERT(gtid >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000255
Jonathan Peyton30419822017-05-12 18:01:32 +0000256 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000257}
258
259/* caller must hold forkjoin_lock */
Jonathan Peyton30419822017-05-12 18:01:32 +0000260void __kmp_check_stack_overlap(kmp_info_t *th) {
261 int f;
262 char *stack_beg = NULL;
263 char *stack_end = NULL;
264 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000265
Jonathan Peyton30419822017-05-12 18:01:32 +0000266 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"));
267 if (__kmp_storage_map) {
268 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
269 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000270
Jonathan Peyton30419822017-05-12 18:01:32 +0000271 gtid = __kmp_gtid_from_thread(th);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000272
Jonathan Peyton30419822017-05-12 18:01:32 +0000273 if (gtid == KMP_GTID_MONITOR) {
274 __kmp_print_storage_map_gtid(
275 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
276 "th_%s stack (%s)", "mon",
277 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000278 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000279 __kmp_print_storage_map_gtid(
280 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
281 "th_%d stack (%s)", gtid,
282 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
283 }
284 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000285
Jonathan Peyton30419822017-05-12 18:01:32 +0000286 /* No point in checking ubermaster threads since they use refinement and
287 * cannot overlap */
288 gtid = __kmp_gtid_from_thread(th);
289 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
290 KA_TRACE(10,
291 ("__kmp_check_stack_overlap: performing extensive checking\n"));
292 if (stack_beg == NULL) {
293 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
294 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
295 }
296
297 for (f = 0; f < __kmp_threads_capacity; f++) {
298 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
299
300 if (f_th && f_th != th) {
301 char *other_stack_end =
302 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
303 char *other_stack_beg =
304 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
305 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
306 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
307
308 /* Print the other stack values before the abort */
309 if (__kmp_storage_map)
310 __kmp_print_storage_map_gtid(
311 -1, other_stack_beg, other_stack_end,
312 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
313 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
314
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000315 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
316 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000317 }
318 }
319 }
320 }
321 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"));
322}
323
324/* ------------------------------------------------------------------------ */
325
326void __kmp_infinite_loop(void) {
327 static int done = FALSE;
328
329 while (!done) {
330 KMP_YIELD(1);
331 }
332}
333
334#define MAX_MESSAGE 512
335
336void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
337 char const *format, ...) {
338 char buffer[MAX_MESSAGE];
339 va_list ap;
340
341 va_start(ap, format);
342 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
343 p2, (unsigned long)size, format);
344 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
345 __kmp_vprintf(kmp_err, buffer, ap);
346#if KMP_PRINT_DATA_PLACEMENT
347 int node;
348 if (gtid >= 0) {
349 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
350 if (__kmp_storage_map_verbose) {
351 node = __kmp_get_host_node(p1);
352 if (node < 0) /* doesn't work, so don't try this next time */
353 __kmp_storage_map_verbose = FALSE;
354 else {
355 char *last;
356 int lastNode;
357 int localProc = __kmp_get_cpu_from_gtid(gtid);
358
359 const int page_size = KMP_GET_PAGE_SIZE();
360
361 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
362 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
363 if (localProc >= 0)
364 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
365 localProc >> 1);
366 else
367 __kmp_printf_no_lock(" GTID %d\n", gtid);
368#if KMP_USE_PRCTL
369 /* The more elaborate format is disabled for now because of the prctl
370 * hanging bug. */
371 do {
372 last = p1;
373 lastNode = node;
374 /* This loop collates adjacent pages with the same host node. */
375 do {
376 (char *)p1 += page_size;
377 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
378 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
379 lastNode);
380 } while (p1 <= p2);
381#else
382 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
383 (char *)p1 + (page_size - 1),
384 __kmp_get_host_node(p1));
385 if (p1 < p2) {
386 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
387 (char *)p2 + (page_size - 1),
388 __kmp_get_host_node(p2));
389 }
390#endif
391 }
392 }
393 } else
394 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning));
395 }
396#endif /* KMP_PRINT_DATA_PLACEMENT */
397 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
398}
399
400void __kmp_warn(char const *format, ...) {
401 char buffer[MAX_MESSAGE];
402 va_list ap;
403
404 if (__kmp_generate_warnings == kmp_warnings_off) {
405 return;
406 }
407
408 va_start(ap, format);
409
410 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format);
411 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
412 __kmp_vprintf(kmp_err, buffer, ap);
413 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
414
415 va_end(ap);
416}
417
418void __kmp_abort_process() {
419 // Later threads may stall here, but that's ok because abort() will kill them.
420 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
421
422 if (__kmp_debug_buf) {
423 __kmp_dump_debug_buffer();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000424 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000425
426 if (KMP_OS_WINDOWS) {
427 // Let other threads know of abnormal termination and prevent deadlock
428 // if abort happened during library initialization or shutdown
429 __kmp_global.g.g_abort = SIGABRT;
430
431 /* On Windows* OS by default abort() causes pop-up error box, which stalls
432 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
433 boxes. _set_abort_behavior() works well, but this function is not
434 available in VS7 (this is not problem for DLL, but it is a problem for
435 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
436 help, at least in some versions of MS C RTL.
437
438 It seems following sequence is the only way to simulate abort() and
439 avoid pop-up error box. */
440 raise(SIGABRT);
441 _exit(3); // Just in case, if signal ignored, exit anyway.
442 } else {
443 abort();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000444 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000445
446 __kmp_infinite_loop();
447 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000448
449} // __kmp_abort_process
450
Jonathan Peyton30419822017-05-12 18:01:32 +0000451void __kmp_abort_thread(void) {
452 // TODO: Eliminate g_abort global variable and this function.
453 // In case of abort just call abort(), it will kill all the threads.
454 __kmp_infinite_loop();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000455} // __kmp_abort_thread
456
Jonathan Peyton30419822017-05-12 18:01:32 +0000457/* Print out the storage map for the major kmp_info_t thread data structures
458 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000459
Jonathan Peyton30419822017-05-12 18:01:32 +0000460static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
461 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
462 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000463
Jonathan Peyton30419822017-05-12 18:01:32 +0000464 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
465 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000466
Jonathan Peyton30419822017-05-12 18:01:32 +0000467 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
468 sizeof(kmp_local_t), "th_%d.th_local", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000469
Jonathan Peyton30419822017-05-12 18:01:32 +0000470 __kmp_print_storage_map_gtid(
471 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
472 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000473
Jonathan Peyton30419822017-05-12 18:01:32 +0000474 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
475 &thr->th.th_bar[bs_plain_barrier + 1],
476 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
477 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000478
Jonathan Peyton30419822017-05-12 18:01:32 +0000479 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
480 &thr->th.th_bar[bs_forkjoin_barrier + 1],
481 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
482 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000483
Jonathan Peyton30419822017-05-12 18:01:32 +0000484#if KMP_FAST_REDUCTION_BARRIER
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
486 &thr->th.th_bar[bs_reduction_barrier + 1],
487 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
488 gtid);
489#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000490}
491
Jonathan Peyton30419822017-05-12 18:01:32 +0000492/* Print out the storage map for the major kmp_team_t team data structures
493 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000494
Jonathan Peyton30419822017-05-12 18:01:32 +0000495static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
496 int team_id, int num_thr) {
497 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
498 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
499 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000500
Jonathan Peyton30419822017-05-12 18:01:32 +0000501 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
502 &team->t.t_bar[bs_last_barrier],
503 sizeof(kmp_balign_team_t) * bs_last_barrier,
504 "%s_%d.t_bar", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000505
Jonathan Peyton30419822017-05-12 18:01:32 +0000506 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
507 &team->t.t_bar[bs_plain_barrier + 1],
508 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
509 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000510
Jonathan Peyton30419822017-05-12 18:01:32 +0000511 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
512 &team->t.t_bar[bs_forkjoin_barrier + 1],
513 sizeof(kmp_balign_team_t),
514 "%s_%d.t_bar[forkjoin]", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000515
Jonathan Peyton30419822017-05-12 18:01:32 +0000516#if KMP_FAST_REDUCTION_BARRIER
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
518 &team->t.t_bar[bs_reduction_barrier + 1],
519 sizeof(kmp_balign_team_t),
520 "%s_%d.t_bar[reduction]", header, team_id);
521#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000522
Jonathan Peyton30419822017-05-12 18:01:32 +0000523 __kmp_print_storage_map_gtid(
524 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
525 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000526
Jonathan Peyton30419822017-05-12 18:01:32 +0000527 __kmp_print_storage_map_gtid(
528 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
529 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000530
Jonathan Peyton30419822017-05-12 18:01:32 +0000531 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
532 &team->t.t_disp_buffer[num_disp_buff],
533 sizeof(dispatch_shared_info_t) * num_disp_buff,
534 "%s_%d.t_disp_buffer", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000535
Jonathan Peyton30419822017-05-12 18:01:32 +0000536 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
537 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header,
538 team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000539}
540
Jonathan Peyton92ca6182018-09-07 18:25:49 +0000541static void __kmp_init_allocator() {
542#if OMP_50_ENABLED
543 __kmp_init_memkind();
544#endif
545}
546static void __kmp_fini_allocator() {
547#if OMP_50_ENABLED
548 __kmp_fini_memkind();
549#endif
550}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000551
552/* ------------------------------------------------------------------------ */
553
Jonathan Peyton8b3842f2018-10-05 17:59:39 +0000554#if KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +0000555#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +0000556
Jonathan Peyton30419822017-05-12 18:01:32 +0000557static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
558 // TODO: Change to __kmp_break_bootstrap_lock().
559 __kmp_init_bootstrap_lock(lck); // make the lock released
Jim Cownie5e8470a2013-09-27 10:38:44 +0000560}
561
Jonathan Peyton30419822017-05-12 18:01:32 +0000562static void __kmp_reset_locks_on_process_detach(int gtid_req) {
563 int i;
564 int thread_count;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000565
Jonathan Peyton30419822017-05-12 18:01:32 +0000566 // PROCESS_DETACH is expected to be called by a thread that executes
567 // ProcessExit() or FreeLibrary(). OS terminates other threads (except the one
568 // calling ProcessExit or FreeLibrary). So, it might be safe to access the
569 // __kmp_threads[] without taking the forkjoin_lock. However, in fact, some
570 // threads can be still alive here, although being about to be terminated. The
571 // threads in the array with ds_thread==0 are most suspicious. Actually, it
572 // can be not safe to access the __kmp_threads[].
Jim Cownie5e8470a2013-09-27 10:38:44 +0000573
Jonathan Peyton30419822017-05-12 18:01:32 +0000574 // TODO: does it make sense to check __kmp_roots[] ?
Jim Cownie5e8470a2013-09-27 10:38:44 +0000575
Jonathan Peyton30419822017-05-12 18:01:32 +0000576 // Let's check that there are no other alive threads registered with the OMP
577 // lib.
578 while (1) {
579 thread_count = 0;
580 for (i = 0; i < __kmp_threads_capacity; ++i) {
581 if (!__kmp_threads)
582 continue;
583 kmp_info_t *th = __kmp_threads[i];
584 if (th == NULL)
585 continue;
586 int gtid = th->th.th_info.ds.ds_gtid;
587 if (gtid == gtid_req)
588 continue;
589 if (gtid < 0)
590 continue;
591 DWORD exit_val;
592 int alive = __kmp_is_thread_alive(th, &exit_val);
593 if (alive) {
594 ++thread_count;
595 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000596 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000597 if (thread_count == 0)
598 break; // success
599 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000600
Jonathan Peyton30419822017-05-12 18:01:32 +0000601 // Assume that I'm alone. Now it might be safe to check and reset locks.
602 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
603 __kmp_reset_lock(&__kmp_forkjoin_lock);
604#ifdef KMP_DEBUG
605 __kmp_reset_lock(&__kmp_stdio_lock);
606#endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000607}
608
Jonathan Peyton30419822017-05-12 18:01:32 +0000609BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
610 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000611
Jonathan Peyton30419822017-05-12 18:01:32 +0000612 switch (fdwReason) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000613
Jonathan Peyton30419822017-05-12 18:01:32 +0000614 case DLL_PROCESS_ATTACH:
615 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000616
617 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000618
619 case DLL_PROCESS_DETACH:
620 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
621
622 if (lpReserved != NULL) {
623 // lpReserved is used for telling the difference:
624 // lpReserved == NULL when FreeLibrary() was called,
625 // lpReserved != NULL when the process terminates.
626 // When FreeLibrary() is called, worker threads remain alive. So they will
627 // release the forkjoin lock by themselves. When the process terminates,
628 // worker threads disappear triggering the problem of unreleased forkjoin
629 // lock as described below.
630
631 // A worker thread can take the forkjoin lock. The problem comes up if
632 // that worker thread becomes dead before it releases the forkjoin lock.
633 // The forkjoin lock remains taken, while the thread executing
634 // DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below will try
635 // to take the forkjoin lock and will always fail, so that the application
636 // will never finish [normally]. This scenario is possible if
637 // __kmpc_end() has not been executed. It looks like it's not a corner
638 // case, but common cases:
639 // - the main function was compiled by an alternative compiler;
640 // - the main function was compiled by icl but without /Qopenmp
641 // (application with plugins);
642 // - application terminates by calling C exit(), Fortran CALL EXIT() or
643 // Fortran STOP.
644 // - alive foreign thread prevented __kmpc_end from doing cleanup.
645 //
646 // This is a hack to work around the problem.
647 // TODO: !!! figure out something better.
648 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
649 }
650
651 __kmp_internal_end_library(__kmp_gtid_get_specific());
652
653 return TRUE;
654
655 case DLL_THREAD_ATTACH:
656 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"));
657
658 /* if we want to register new siblings all the time here call
659 * __kmp_get_gtid(); */
660 return TRUE;
661
662 case DLL_THREAD_DETACH:
663 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
664
665 __kmp_internal_end_thread(__kmp_gtid_get_specific());
666 return TRUE;
667 }
668
669 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000670}
671
Jonathan Peyton30419822017-05-12 18:01:32 +0000672#endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000673#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000674
Jim Cownie5e8470a2013-09-27 10:38:44 +0000675/* Change the library type to "status" and return the old type */
676/* called from within initialization routines where __kmp_initz_lock is held */
Jonathan Peyton30419822017-05-12 18:01:32 +0000677int __kmp_change_library(int status) {
678 int old_status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000679
Jonathan Peyton30419822017-05-12 18:01:32 +0000680 old_status = __kmp_yield_init &
681 1; // check whether KMP_LIBRARY=throughput (even init count)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000682
Jonathan Peyton30419822017-05-12 18:01:32 +0000683 if (status) {
684 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
685 } else {
686 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
687 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000688
Jonathan Peyton30419822017-05-12 18:01:32 +0000689 return old_status; // return previous setting of whether
690 // KMP_LIBRARY=throughput
Jim Cownie5e8470a2013-09-27 10:38:44 +0000691}
692
Jonathan Peyton30419822017-05-12 18:01:32 +0000693/* __kmp_parallel_deo -- Wait until it's our turn. */
694void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
695 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000696#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000697 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000698#endif /* BUILD_PARALLEL_ORDERED */
699
Jonathan Peyton30419822017-05-12 18:01:32 +0000700 if (__kmp_env_consistency_check) {
701 if (__kmp_threads[gtid]->th.th_root->r.r_active)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000702#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000703 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000704#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000705 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000706#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000707 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000708#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000709 if (!team->t.t_serialized) {
710 KMP_MB();
711 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
712 KMP_EQ, NULL);
713 KMP_MB();
714 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000715#endif /* BUILD_PARALLEL_ORDERED */
716}
717
Jonathan Peyton30419822017-05-12 18:01:32 +0000718/* __kmp_parallel_dxo -- Signal the next task. */
719void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
720 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000721#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000722 int tid = __kmp_tid_from_gtid(gtid);
723 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000724#endif /* BUILD_PARALLEL_ORDERED */
725
Jonathan Peyton30419822017-05-12 18:01:32 +0000726 if (__kmp_env_consistency_check) {
727 if (__kmp_threads[gtid]->th.th_root->r.r_active)
728 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
729 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000731 if (!team->t.t_serialized) {
732 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000733
Jonathan Peyton30419822017-05-12 18:01:32 +0000734 /* use the tid of the next thread in this team */
735 /* TODO replace with general release procedure */
736 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000737
Jonathan Peyton30419822017-05-12 18:01:32 +0000738 KMP_MB(); /* Flush all pending memory write invalidates. */
739 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000740#endif /* BUILD_PARALLEL_ORDERED */
741}
742
743/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000744/* The BARRIER for a SINGLE process section is always explicit */
745
Jonathan Peyton30419822017-05-12 18:01:32 +0000746int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
747 int status;
748 kmp_info_t *th;
749 kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000750
Jonathan Peyton30419822017-05-12 18:01:32 +0000751 if (!TCR_4(__kmp_init_parallel))
752 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000753
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000754#if OMP_50_ENABLED
755 __kmp_resume_if_soft_paused();
756#endif
757
Jonathan Peyton30419822017-05-12 18:01:32 +0000758 th = __kmp_threads[gtid];
759 team = th->th.th_team;
760 status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000761
Jonathan Peyton30419822017-05-12 18:01:32 +0000762 th->th.th_ident = id_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000763
Jonathan Peyton30419822017-05-12 18:01:32 +0000764 if (team->t.t_serialized) {
765 status = 1;
766 } else {
767 kmp_int32 old_this = th->th.th_local.this_construct;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000768
Jonathan Peyton30419822017-05-12 18:01:32 +0000769 ++th->th.th_local.this_construct;
770 /* try to set team count to thread count--success means thread got the
771 single block */
772 /* TODO: Should this be acquire or release? */
773 if (team->t.t_construct == old_this) {
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000774 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
775 th->th.th_local.this_construct);
Jonathan Peyton30419822017-05-12 18:01:32 +0000776 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000777#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000778 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
779 KMP_MASTER_GTID(gtid) &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000780#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000781 th->th.th_teams_microtask == NULL &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000782#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000783 team->t.t_active_level ==
784 1) { // Only report metadata by master of active team at level 1
785 __kmp_itt_metadata_single(id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000786 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000787#endif /* USE_ITT_BUILD */
788 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000789
Jonathan Peyton30419822017-05-12 18:01:32 +0000790 if (__kmp_env_consistency_check) {
791 if (status && push_ws) {
792 __kmp_push_workshare(gtid, ct_psingle, id_ref);
793 } else {
794 __kmp_check_workshare(gtid, ct_psingle, id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000795 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000796 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000797#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000798 if (status) {
799 __kmp_itt_single_start(gtid);
800 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000801#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000802 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000803}
804
Jonathan Peyton30419822017-05-12 18:01:32 +0000805void __kmp_exit_single(int gtid) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000806#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000807 __kmp_itt_single_end(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000808#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000809 if (__kmp_env_consistency_check)
810 __kmp_pop_workshare(gtid, ct_psingle, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000811}
812
Jonathan Peyton30419822017-05-12 18:01:32 +0000813/* determine if we can go parallel or must use a serialized parallel region and
Jim Cownie5e8470a2013-09-27 10:38:44 +0000814 * how many threads we can use
815 * set_nproc is the number of threads requested for the team
816 * returns 0 if we should serialize or only use one thread,
817 * otherwise the number of threads to use
Jonathan Peyton30419822017-05-12 18:01:32 +0000818 * The forkjoin lock is held by the caller. */
819static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
820 int master_tid, int set_nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +0000821#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000822 ,
823 int enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +0000824#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +0000825 ) {
826 int capacity;
827 int new_nthreads;
828 KMP_DEBUG_ASSERT(__kmp_init_serial);
829 KMP_DEBUG_ASSERT(root && parent_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000830
Jonathan Peyton30419822017-05-12 18:01:32 +0000831 // If dyn-var is set, dynamically adjust the number of desired threads,
832 // according to the method specified by dynamic_mode.
833 new_nthreads = set_nthreads;
834 if (!get__dynamic_2(parent_team, master_tid)) {
835 ;
836 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000837#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +0000838 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
839 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
840 if (new_nthreads == 1) {
841 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
842 "reservation to 1 thread\n",
843 master_tid));
844 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000845 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000846 if (new_nthreads < set_nthreads) {
847 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
848 "reservation to %d threads\n",
849 master_tid, new_nthreads));
850 }
851 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000852#endif /* USE_LOAD_BALANCE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000853 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
854 new_nthreads = __kmp_avail_proc - __kmp_nth +
855 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
856 if (new_nthreads <= 1) {
857 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
858 "reservation to 1 thread\n",
859 master_tid));
860 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000861 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000862 if (new_nthreads < set_nthreads) {
863 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
864 "reservation to %d threads\n",
865 master_tid, new_nthreads));
866 } else {
867 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000868 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000869 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
870 if (set_nthreads > 2) {
871 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
872 new_nthreads = (new_nthreads % set_nthreads) + 1;
873 if (new_nthreads == 1) {
874 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
875 "reservation to 1 thread\n",
876 master_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000877 return 1;
Jonathan Peyton30419822017-05-12 18:01:32 +0000878 }
879 if (new_nthreads < set_nthreads) {
880 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
881 "reservation to %d threads\n",
882 master_tid, new_nthreads));
883 }
884 }
885 } else {
886 KMP_ASSERT(0);
887 }
888
Jonathan Peytonf4392462017-07-27 20:58:41 +0000889 // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
Jonathan Peyton30419822017-05-12 18:01:32 +0000890 if (__kmp_nth + new_nthreads -
891 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
892 __kmp_max_nth) {
893 int tl_nthreads = __kmp_max_nth - __kmp_nth +
894 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
895 if (tl_nthreads <= 0) {
896 tl_nthreads = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000897 }
898
Jonathan Peyton30419822017-05-12 18:01:32 +0000899 // If dyn-var is false, emit a 1-time warning.
900 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
901 __kmp_reserve_warn = 1;
902 __kmp_msg(kmp_ms_warning,
903 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
904 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
905 }
906 if (tl_nthreads == 1) {
Jonathan Peytonf4392462017-07-27 20:58:41 +0000907 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
908 "reduced reservation to 1 thread\n",
Jonathan Peyton30419822017-05-12 18:01:32 +0000909 master_tid));
910 return 1;
911 }
Jonathan Peytonf4392462017-07-27 20:58:41 +0000912 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
913 "reservation to %d threads\n",
914 master_tid, tl_nthreads));
915 new_nthreads = tl_nthreads;
916 }
917
918 // Respect OMP_THREAD_LIMIT
919 if (root->r.r_cg_nthreads + new_nthreads -
920 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
921 __kmp_cg_max_nth) {
922 int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads +
923 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
924 if (tl_nthreads <= 0) {
925 tl_nthreads = 1;
926 }
927
928 // If dyn-var is false, emit a 1-time warning.
929 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
930 __kmp_reserve_warn = 1;
931 __kmp_msg(kmp_ms_warning,
932 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
933 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
934 }
935 if (tl_nthreads == 1) {
936 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
937 "reduced reservation to 1 thread\n",
938 master_tid));
939 return 1;
940 }
941 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
Jonathan Peyton30419822017-05-12 18:01:32 +0000942 "reservation to %d threads\n",
943 master_tid, tl_nthreads));
944 new_nthreads = tl_nthreads;
945 }
946
947 // Check if the threads array is large enough, or needs expanding.
Jonathan Peyton30419822017-05-12 18:01:32 +0000948 // See comment in __kmp_register_root() about the adjustment if
949 // __kmp_threads[0] == NULL.
950 capacity = __kmp_threads_capacity;
951 if (TCR_PTR(__kmp_threads[0]) == NULL) {
952 --capacity;
953 }
954 if (__kmp_nth + new_nthreads -
955 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
956 capacity) {
957 // Expand the threads array.
958 int slotsRequired = __kmp_nth + new_nthreads -
959 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
960 capacity;
Jonathan Peyton1800ece2018-01-10 18:27:01 +0000961 int slotsAdded = __kmp_expand_threads(slotsRequired);
Jonathan Peyton30419822017-05-12 18:01:32 +0000962 if (slotsAdded < slotsRequired) {
963 // The threads array was not expanded enough.
964 new_nthreads -= (slotsRequired - slotsAdded);
965 KMP_ASSERT(new_nthreads >= 1);
966
967 // If dyn-var is false, emit a 1-time warning.
968 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
969 __kmp_reserve_warn = 1;
970 if (__kmp_tp_cached) {
971 __kmp_msg(kmp_ms_warning,
972 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
973 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
974 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
975 } else {
976 __kmp_msg(kmp_ms_warning,
977 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
978 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
979 }
980 }
981 }
982 }
983
Jonathan Peyton642688b2017-06-01 16:46:36 +0000984#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000985 if (new_nthreads == 1) {
986 KC_TRACE(10,
987 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
988 "dead roots and rechecking; requested %d threads\n",
989 __kmp_get_gtid(), set_nthreads));
Jonathan Peyton642688b2017-06-01 16:46:36 +0000990 } else {
991 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
992 " %d threads\n",
993 __kmp_get_gtid(), new_nthreads, set_nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +0000994 }
Jonathan Peyton642688b2017-06-01 16:46:36 +0000995#endif // KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000996 return new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000997}
998
Jonathan Peyton30419822017-05-12 18:01:32 +0000999/* Allocate threads from the thread pool and assign them to the new team. We are
1000 assured that there are enough threads available, because we checked on that
1001 earlier within critical section forkjoin */
1002static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
1003 kmp_info_t *master_th, int master_gtid) {
1004 int i;
1005 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001006
Jonathan Peyton30419822017-05-12 18:01:32 +00001007 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
1008 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
1009 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001010
Jonathan Peyton30419822017-05-12 18:01:32 +00001011 /* first, let's setup the master thread */
1012 master_th->th.th_info.ds.ds_tid = 0;
1013 master_th->th.th_team = team;
1014 master_th->th.th_team_nproc = team->t.t_nproc;
1015 master_th->th.th_team_master = master_th;
1016 master_th->th.th_team_serialized = FALSE;
1017 master_th->th.th_dispatch = &team->t.t_dispatch[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001018
Jonathan Peyton30419822017-05-12 18:01:32 +00001019/* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001020#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001021 use_hot_team = 0;
1022 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1023 if (hot_teams) { // hot teams array is not allocated if
1024 // KMP_HOT_TEAMS_MAX_LEVEL=0
1025 int level = team->t.t_active_level - 1; // index in array of hot teams
1026 if (master_th->th.th_teams_microtask) { // are we inside the teams?
1027 if (master_th->th.th_teams_size.nteams > 1) {
1028 ++level; // level was not increased in teams construct for
1029 // team_of_masters
1030 }
1031 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1032 master_th->th.th_teams_level == team->t.t_level) {
1033 ++level; // level was not increased in teams construct for
1034 // team_of_workers before the parallel
1035 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001036 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001037 if (level < __kmp_hot_teams_max_level) {
1038 if (hot_teams[level].hot_team) {
1039 // hot team has already been allocated for given level
1040 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1041 use_hot_team = 1; // the team is ready to use
1042 } else {
1043 use_hot_team = 0; // AC: threads are not allocated yet
1044 hot_teams[level].hot_team = team; // remember new hot team
1045 hot_teams[level].hot_team_nth = team->t.t_nproc;
1046 }
1047 } else {
1048 use_hot_team = 0;
1049 }
1050 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001051#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001052 use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001053#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001054 if (!use_hot_team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001055
Jonathan Peyton30419822017-05-12 18:01:32 +00001056 /* install the master thread */
1057 team->t.t_threads[0] = master_th;
1058 __kmp_initialize_info(master_th, team, 0, master_gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001059
Jonathan Peyton30419822017-05-12 18:01:32 +00001060 /* now, install the worker threads */
1061 for (i = 1; i < team->t.t_nproc; i++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001062
Jonathan Peyton30419822017-05-12 18:01:32 +00001063 /* fork or reallocate a new thread and install it in team */
1064 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1065 team->t.t_threads[i] = thr;
1066 KMP_DEBUG_ASSERT(thr);
1067 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1068 /* align team and thread arrived states */
1069 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1070 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1071 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1072 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1073 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1074 team->t.t_bar[bs_plain_barrier].b_arrived));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001075#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001076 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1077 thr->th.th_teams_level = master_th->th.th_teams_level;
1078 thr->th.th_teams_size = master_th->th.th_teams_size;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001079#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001080 { // Initialize threads' barrier data.
1081 int b;
1082 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1083 for (b = 0; b < bs_last_barrier; ++b) {
1084 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1085 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001086#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001087 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001088#endif
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001089 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001090 }
1091 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001092
Alp Toker98758b02014-03-02 04:12:06 +00001093#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001094 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001095#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001096 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001097
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001098#if OMP_50_ENABLED
1099 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1100 for (i = 0; i < team->t.t_nproc; i++) {
1101 kmp_info_t *thr = team->t.t_threads[i];
1102 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1103 thr->th.th_prev_level != team->t.t_level) {
1104 team->t.t_display_affinity = 1;
1105 break;
1106 }
1107 }
1108 }
1109#endif
1110
Jonathan Peyton30419822017-05-12 18:01:32 +00001111 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001112}
1113
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001114#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001115// Propagate any changes to the floating point control registers out to the team
Jonathan Peyton30419822017-05-12 18:01:32 +00001116// We try to avoid unnecessary writes to the relevant cache line in the team
1117// structure, so we don't make changes unless they are needed.
1118inline static void propagateFPControl(kmp_team_t *team) {
1119 if (__kmp_inherit_fp_control) {
1120 kmp_int16 x87_fpu_control_word;
1121 kmp_uint32 mxcsr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001122
Jonathan Peyton30419822017-05-12 18:01:32 +00001123 // Get master values of FPU control flags (both X87 and vector)
1124 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1125 __kmp_store_mxcsr(&mxcsr);
1126 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001127
Jonathan Peyton94a114f2017-10-20 19:30:57 +00001128 // There is no point looking at t_fp_control_saved here.
1129 // If it is TRUE, we still have to update the values if they are different
Jonas Hahnfeldf0a1c652017-11-03 18:28:19 +00001130 // from those we now have. If it is FALSE we didn't save anything yet, but
1131 // our objective is the same. We have to ensure that the values in the team
1132 // are the same as those we have.
Jonathan Peyton94a114f2017-10-20 19:30:57 +00001133 // So, this code achieves what we need whether or not t_fp_control_saved is
1134 // true. By checking whether the value needs updating we avoid unnecessary
1135 // writes that would put the cache-line into a written state, causing all
1136 // threads in the team to have to read it again.
Jonathan Peyton30419822017-05-12 18:01:32 +00001137 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1138 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1139 // Although we don't use this value, other code in the runtime wants to know
1140 // whether it should restore them. So we must ensure it is correct.
1141 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1142 } else {
1143 // Similarly here. Don't write to this cache-line in the team structure
1144 // unless we have to.
1145 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1146 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001147}
1148
Jonathan Peyton30419822017-05-12 18:01:32 +00001149// Do the opposite, setting the hardware registers to the updated values from
1150// the team.
1151inline static void updateHWFPControl(kmp_team_t *team) {
1152 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1153 // Only reset the fp control regs if they have been changed in the team.
1154 // the parallel region that we are exiting.
1155 kmp_int16 x87_fpu_control_word;
1156 kmp_uint32 mxcsr;
1157 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1158 __kmp_store_mxcsr(&mxcsr);
1159 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001160
Jonathan Peyton30419822017-05-12 18:01:32 +00001161 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1162 __kmp_clear_x87_fpu_status_word();
1163 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001164 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001165
1166 if (team->t.t_mxcsr != mxcsr) {
1167 __kmp_load_mxcsr(&team->t.t_mxcsr);
1168 }
1169 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001170}
1171#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001172#define propagateFPControl(x) ((void)0)
1173#define updateHWFPControl(x) ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001174#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1175
Jonathan Peyton30419822017-05-12 18:01:32 +00001176static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1177 int realloc); // forward declaration
Jim Cownie5e8470a2013-09-27 10:38:44 +00001178
Jonathan Peyton30419822017-05-12 18:01:32 +00001179/* Run a parallel region that has been serialized, so runs only in a team of the
1180 single master thread. */
1181void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1182 kmp_info_t *this_thr;
1183 kmp_team_t *serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001184
Jonathan Peyton30419822017-05-12 18:01:32 +00001185 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001186
Jonathan Peyton30419822017-05-12 18:01:32 +00001187 /* Skip all this code for autopar serialized loops since it results in
1188 unacceptable overhead */
1189 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
1190 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001191
Jonathan Peyton30419822017-05-12 18:01:32 +00001192 if (!TCR_4(__kmp_init_parallel))
1193 __kmp_parallel_initialize();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001194
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001195#if OMP_50_ENABLED
1196 __kmp_resume_if_soft_paused();
1197#endif
1198
Jonathan Peyton30419822017-05-12 18:01:32 +00001199 this_thr = __kmp_threads[global_tid];
1200 serial_team = this_thr->th.th_serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001201
Jonathan Peyton30419822017-05-12 18:01:32 +00001202 /* utilize the serialized team held by this thread */
1203 KMP_DEBUG_ASSERT(serial_team);
1204 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001205
Jonathan Peyton30419822017-05-12 18:01:32 +00001206 if (__kmp_tasking_mode != tskm_immediate_exec) {
1207 KMP_DEBUG_ASSERT(
1208 this_thr->th.th_task_team ==
1209 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1210 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1211 NULL);
1212 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1213 "team %p, new task_team = NULL\n",
1214 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1215 this_thr->th.th_task_team = NULL;
1216 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001217
1218#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001219 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1220 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1221 proc_bind = proc_bind_false;
1222 } else if (proc_bind == proc_bind_default) {
1223 // No proc_bind clause was specified, so use the current value
1224 // of proc-bind-var for this parallel region.
1225 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1226 }
1227 // Reset for next parallel region
1228 this_thr->th.th_set_proc_bind = proc_bind_default;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001229#endif /* OMP_40_ENABLED */
1230
Joachim Protze82e94a52017-11-01 10:08:30 +00001231#if OMPT_SUPPORT
Jonathan Peyton3574f282018-10-04 14:57:04 +00001232 ompt_data_t ompt_parallel_data = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001233 ompt_data_t *implicit_task_data;
1234 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1235 if (ompt_enabled.enabled &&
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001236 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001237
1238 ompt_task_info_t *parent_task_info;
1239 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1240
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001241 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001242 if (ompt_enabled.ompt_callback_parallel_begin) {
1243 int team_size = 1;
1244
1245 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1246 &(parent_task_info->task_data), &(parent_task_info->frame),
Joachim Protze489cdb72018-09-10 14:34:54 +00001247 &ompt_parallel_data, team_size, ompt_parallel_invoker_program,
1248 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001249 }
1250 }
1251#endif // OMPT_SUPPORT
1252
Jonathan Peyton30419822017-05-12 18:01:32 +00001253 if (this_thr->th.th_team != serial_team) {
1254 // Nested level will be an index in the nested nthreads array
1255 int level = this_thr->th.th_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001256
Jonathan Peyton30419822017-05-12 18:01:32 +00001257 if (serial_team->t.t_serialized) {
1258 /* this serial team was already used
1259 TODO increase performance by making this locks more specific */
1260 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001261
Jonathan Peyton30419822017-05-12 18:01:32 +00001262 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001263
Jonathan Peyton30419822017-05-12 18:01:32 +00001264 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001265#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001266 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001267#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001268#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001269 proc_bind,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001270#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001271 &this_thr->th.th_current_task->td_icvs,
1272 0 USE_NESTED_HOT_ARG(NULL));
1273 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1274 KMP_ASSERT(new_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001275
Jonathan Peyton30419822017-05-12 18:01:32 +00001276 /* setup new serialized team and install it */
1277 new_team->t.t_threads[0] = this_thr;
1278 new_team->t.t_parent = this_thr->th.th_team;
1279 serial_team = new_team;
1280 this_thr->th.th_serial_team = serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001281
Jonathan Peyton30419822017-05-12 18:01:32 +00001282 KF_TRACE(
1283 10,
1284 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1285 global_tid, serial_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001286
Jonathan Peyton30419822017-05-12 18:01:32 +00001287 /* TODO the above breaks the requirement that if we run out of resources,
1288 then we can still guarantee that serialized teams are ok, since we may
1289 need to allocate a new one */
1290 } else {
1291 KF_TRACE(
1292 10,
1293 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1294 global_tid, serial_team));
1295 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001296
Jonathan Peyton30419822017-05-12 18:01:32 +00001297 /* we have to initialize this serial team */
1298 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1299 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1300 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1301 serial_team->t.t_ident = loc;
1302 serial_team->t.t_serialized = 1;
1303 serial_team->t.t_nproc = 1;
1304 serial_team->t.t_parent = this_thr->th.th_team;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00001305 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00001306 this_thr->th.th_team = serial_team;
1307 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001308
Jonathan Peyton30419822017-05-12 18:01:32 +00001309 KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1310 this_thr->th.th_current_task));
1311 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1312 this_thr->th.th_current_task->td_flags.executing = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001313
Jonathan Peyton30419822017-05-12 18:01:32 +00001314 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001315
Jonathan Peyton30419822017-05-12 18:01:32 +00001316 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1317 implicit task for each serialized task represented by
1318 team->t.t_serialized? */
1319 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1320 &this_thr->th.th_current_task->td_parent->td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001321
Jonathan Peyton30419822017-05-12 18:01:32 +00001322 // Thread value exists in the nested nthreads array for the next nested
1323 // level
1324 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1325 this_thr->th.th_current_task->td_icvs.nproc =
1326 __kmp_nested_nth.nth[level + 1];
1327 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001328
1329#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001330 if (__kmp_nested_proc_bind.used &&
1331 (level + 1 < __kmp_nested_proc_bind.used)) {
1332 this_thr->th.th_current_task->td_icvs.proc_bind =
1333 __kmp_nested_proc_bind.bind_types[level + 1];
1334 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001335#endif /* OMP_40_ENABLED */
1336
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001337#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001338 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001339#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001340 this_thr->th.th_info.ds.ds_tid = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001341
Jonathan Peyton30419822017-05-12 18:01:32 +00001342 /* set thread cache values */
1343 this_thr->th.th_team_nproc = 1;
1344 this_thr->th.th_team_master = this_thr;
1345 this_thr->th.th_team_serialized = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001346
Jonathan Peyton30419822017-05-12 18:01:32 +00001347 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1348 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001349#if OMP_50_ENABLED
1350 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save
1351#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001352
Jonathan Peyton30419822017-05-12 18:01:32 +00001353 propagateFPControl(serial_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001354
Jonathan Peyton30419822017-05-12 18:01:32 +00001355 /* check if we need to allocate dispatch buffers stack */
1356 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1357 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1358 serial_team->t.t_dispatch->th_disp_buffer =
1359 (dispatch_private_info_t *)__kmp_allocate(
1360 sizeof(dispatch_private_info_t));
1361 }
1362 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001363
Jonathan Peyton30419822017-05-12 18:01:32 +00001364 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001365
Jonathan Peyton30419822017-05-12 18:01:32 +00001366 } else {
1367 /* this serialized team is already being used,
1368 * that's fine, just add another nested level */
1369 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1370 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1371 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1372 ++serial_team->t.t_serialized;
1373 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001374
Jonathan Peyton30419822017-05-12 18:01:32 +00001375 // Nested level will be an index in the nested nthreads array
1376 int level = this_thr->th.th_team->t.t_level;
1377 // Thread value exists in the nested nthreads array for the next nested
1378 // level
1379 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1380 this_thr->th.th_current_task->td_icvs.nproc =
1381 __kmp_nested_nth.nth[level + 1];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001382 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001383 serial_team->t.t_level++;
1384 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
1385 "of serial team %p to %d\n",
1386 global_tid, serial_team, serial_team->t.t_level));
1387
1388 /* allocate/push dispatch buffers stack */
1389 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1390 {
1391 dispatch_private_info_t *disp_buffer =
1392 (dispatch_private_info_t *)__kmp_allocate(
1393 sizeof(dispatch_private_info_t));
1394 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1395 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1396 }
1397 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1398
1399 KMP_MB();
1400 }
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001401#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001402 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001403#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001404
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001405#if OMP_50_ENABLED
1406 // Perform the display affinity functionality for
1407 // serialized parallel regions
1408 if (__kmp_display_affinity) {
1409 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1410 this_thr->th.th_prev_num_threads != 1) {
1411 // NULL means use the affinity-format-var ICV
1412 __kmp_aux_display_affinity(global_tid, NULL);
1413 this_thr->th.th_prev_level = serial_team->t.t_level;
1414 this_thr->th.th_prev_num_threads = 1;
1415 }
1416 }
1417#endif
1418
Jonathan Peyton30419822017-05-12 18:01:32 +00001419 if (__kmp_env_consistency_check)
1420 __kmp_push_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001421#if OMPT_SUPPORT
1422 serial_team->t.ompt_team_info.master_return_address = codeptr;
1423 if (ompt_enabled.enabled &&
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001424 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1425 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001426
1427 ompt_lw_taskteam_t lw_taskteam;
1428 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1429 &ompt_parallel_data, codeptr);
1430
1431 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1432 // don't use lw_taskteam after linking. content was swaped
1433
1434 /* OMPT implicit task begin */
1435 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1436 if (ompt_enabled.ompt_callback_implicit_task) {
1437 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1438 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
Joachim Protze2b46d302019-01-15 15:36:53 +00001439 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Joachim Protze9be9cf22018-05-07 12:42:21 +00001440 OMPT_CUR_TASK_INFO(this_thr)
1441 ->thread_num = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +00001442 }
1443
1444 /* OMPT state */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001445 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1446 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001447 }
1448#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001449}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001450
Jim Cownie5e8470a2013-09-27 10:38:44 +00001451/* most of the work for a fork */
1452/* return true if we really went parallel, false if serialized */
Jonathan Peyton30419822017-05-12 18:01:32 +00001453int __kmp_fork_call(ident_t *loc, int gtid,
1454 enum fork_context_e call_context, // Intel, GNU, ...
Joachim Protze82e94a52017-11-01 10:08:30 +00001455 kmp_int32 argc, microtask_t microtask, launch_t invoker,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001456/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001457#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001458 va_list *ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001459#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001460 va_list ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001461#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001462 ) {
1463 void **argv;
1464 int i;
1465 int master_tid;
1466 int master_this_cons;
1467 kmp_team_t *team;
1468 kmp_team_t *parent_team;
1469 kmp_info_t *master_th;
1470 kmp_root_t *root;
1471 int nthreads;
1472 int master_active;
1473 int master_set_numthreads;
1474 int level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001475#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001476 int active_level;
1477 int teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001478#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001479#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001480 kmp_hot_team_ptr_t **p_hot_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001481#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001482 { // KMP_TIME_BLOCK
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001483 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001484 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001485
Jonathan Peyton30419822017-05-12 18:01:32 +00001486 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid));
1487 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1488 /* Some systems prefer the stack for the root thread(s) to start with */
1489 /* some gap from the parent stack to prevent false sharing. */
1490 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1491 /* These 2 lines below are so this does not get optimized out */
1492 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1493 __kmp_stkpadding += (short)((kmp_int64)dummy);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001494 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001495
1496 /* initialize if needed */
Jonathan Peyton30419822017-05-12 18:01:32 +00001497 KMP_DEBUG_ASSERT(
1498 __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown
1499 if (!TCR_4(__kmp_init_parallel))
1500 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001501
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001502#if OMP_50_ENABLED
1503 __kmp_resume_if_soft_paused();
1504#endif
1505
Jim Cownie5e8470a2013-09-27 10:38:44 +00001506 /* setup current data */
Jonathan Peyton30419822017-05-12 18:01:32 +00001507 master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with
1508 // shutdown
1509 parent_team = master_th->th.th_team;
1510 master_tid = master_th->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001511 master_this_cons = master_th->th.th_local.this_construct;
Jonathan Peyton30419822017-05-12 18:01:32 +00001512 root = master_th->th.th_root;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001513 master_active = root->r.r_active;
1514 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001515
1516#if OMPT_SUPPORT
Jonathan Peyton3574f282018-10-04 14:57:04 +00001517 ompt_data_t ompt_parallel_data = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001518 ompt_data_t *parent_task_data;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001519 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001520 ompt_data_t *implicit_task_data;
1521 void *return_address = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001522
Joachim Protze82e94a52017-11-01 10:08:30 +00001523 if (ompt_enabled.enabled) {
1524 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1525 NULL, NULL);
1526 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001527 }
1528#endif
1529
Jim Cownie5e8470a2013-09-27 10:38:44 +00001530 // Nested level will be an index in the nested nthreads array
Jonathan Peyton30419822017-05-12 18:01:32 +00001531 level = parent_team->t.t_level;
1532 // used to launch non-serial teams even if nested is not allowed
1533 active_level = parent_team->t.t_active_level;
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001534#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00001535 // needed to check nesting inside the teams
1536 teams_level = master_th->th.th_teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001537#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001538#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001539 p_hot_teams = &master_th->th.th_hot_teams;
1540 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1541 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1542 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1543 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
Jonathan Peyton642688b2017-06-01 16:46:36 +00001544 // it is either actual or not needed (when active_level > 0)
1545 (*p_hot_teams)[0].hot_team_nth = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001546 }
1547#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001548
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001549#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001550 if (ompt_enabled.enabled) {
1551 if (ompt_enabled.ompt_callback_parallel_begin) {
1552 int team_size = master_set_numthreads
1553 ? master_set_numthreads
1554 : get__nproc_2(parent_team, master_tid);
1555 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1556 parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
1557 OMPT_INVOKER(call_context), return_address);
1558 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001559 master_th->th.ompt_thread_info.state = ompt_state_overhead;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001560 }
1561#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001562
Jim Cownie5e8470a2013-09-27 10:38:44 +00001563 master_th->th.th_ident = loc;
1564
1565#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001566 if (master_th->th.th_teams_microtask && ap &&
1567 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1568 // AC: This is start of parallel that is nested inside teams construct.
1569 // The team is actual (hot), all workers are ready at the fork barrier.
1570 // No lock needed to initialize the team a bit, then free workers.
1571 parent_team->t.t_ident = loc;
1572 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1573 parent_team->t.t_argc = argc;
1574 argv = (void **)parent_team->t.t_argv;
1575 for (i = argc - 1; i >= 0; --i)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001576/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001577#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001578 *argv++ = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001579#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001580 *argv++ = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001581#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001582 // Increment our nested depth levels, but not increase the serialization
1583 if (parent_team == master_th->th.th_serial_team) {
1584 // AC: we are in serialized parallel
1585 __kmpc_serialized_parallel(loc, gtid);
1586 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1587 // AC: need this in order enquiry functions work
1588 // correctly, will restore at join time
1589 parent_team->t.t_serialized--;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001590#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001591 void *dummy;
1592 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001593
Jonathan Peyton30419822017-05-12 18:01:32 +00001594 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001595
Joachim Protze82e94a52017-11-01 10:08:30 +00001596 if (ompt_enabled.enabled) {
1597 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1598 &ompt_parallel_data, return_address);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001599 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001600
Joachim Protze82e94a52017-11-01 10:08:30 +00001601 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1602 // don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001603
Jonathan Peyton30419822017-05-12 18:01:32 +00001604 /* OMPT implicit task begin */
Joachim Protze82e94a52017-11-01 10:08:30 +00001605 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1606 if (ompt_enabled.ompt_callback_implicit_task) {
1607 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1608 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
Joachim Protze2b46d302019-01-15 15:36:53 +00001609 implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Joachim Protze9be9cf22018-05-07 12:42:21 +00001610 OMPT_CUR_TASK_INFO(master_th)
1611 ->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001612 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001613
Jonathan Peyton30419822017-05-12 18:01:32 +00001614 /* OMPT state */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001615 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001616 } else {
1617 exit_runtime_p = &dummy;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001618 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001619#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001620
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001621 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001622 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1623 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1624 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1625#if OMPT_SUPPORT
1626 ,
1627 exit_runtime_p
1628#endif
1629 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001630 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001631
Jonathan Peyton30419822017-05-12 18:01:32 +00001632#if OMPT_SUPPORT
1633 *exit_runtime_p = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001634 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001635 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001636 if (ompt_enabled.ompt_callback_implicit_task) {
1637 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1638 ompt_scope_end, NULL, implicit_task_data, 1,
Joachim Protze2b46d302019-01-15 15:36:53 +00001639 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Jonathan Peyton30419822017-05-12 18:01:32 +00001640 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001641 __ompt_lw_taskteam_unlink(master_th);
Jonathan Peyton30419822017-05-12 18:01:32 +00001642
Joachim Protze82e94a52017-11-01 10:08:30 +00001643 if (ompt_enabled.ompt_callback_parallel_end) {
1644 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1645 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
1646 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001647 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001648 master_th->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001649 }
1650#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001651 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +00001652 }
1653
1654 parent_team->t.t_pkfn = microtask;
Jonathan Peyton30419822017-05-12 18:01:32 +00001655 parent_team->t.t_invoke = invoker;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001656 KMP_ATOMIC_INC(&root->r.r_in_parallel);
Jonathan Peyton30419822017-05-12 18:01:32 +00001657 parent_team->t.t_active_level++;
1658 parent_team->t.t_level++;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001659#if OMP_50_ENABLED
1660 parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
1661#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001662
1663 /* Change number of threads in the team if requested */
1664 if (master_set_numthreads) { // The parallel has num_threads clause
1665 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1666 // AC: only can reduce number of threads dynamically, can't increase
1667 kmp_info_t **other_threads = parent_team->t.t_threads;
1668 parent_team->t.t_nproc = master_set_numthreads;
1669 for (i = 0; i < master_set_numthreads; ++i) {
1670 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1671 }
1672 // Keep extra threads hot in the team for possible next parallels
1673 }
1674 master_th->th.th_set_nproc = 0;
1675 }
1676
1677#if USE_DEBUGGER
1678 if (__kmp_debugging) { // Let debugger override number of threads.
1679 int nth = __kmp_omp_num_threads(loc);
Jonathan Peyton642688b2017-06-01 16:46:36 +00001680 if (nth > 0) { // 0 means debugger doesn't want to change num threads
Jonathan Peyton30419822017-05-12 18:01:32 +00001681 master_set_numthreads = nth;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001682 }
1683 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001684#endif
1685
1686 KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
1687 "master_th=%p, gtid=%d\n",
1688 root, parent_team, master_th, gtid));
1689 __kmp_internal_fork(loc, gtid, parent_team);
1690 KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, "
1691 "master_th=%p, gtid=%d\n",
1692 root, parent_team, master_th, gtid));
1693
1694 /* Invoke microtask for MASTER thread */
1695 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1696 parent_team->t.t_id, parent_team->t.t_pkfn));
1697
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001698 if (!parent_team->t.t_invoke(gtid)) {
1699 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
Jonathan Peyton30419822017-05-12 18:01:32 +00001700 }
1701 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1702 parent_team->t.t_id, parent_team->t.t_pkfn));
1703 KMP_MB(); /* Flush all pending memory write invalidates. */
1704
1705 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
1706
1707 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001708 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001709#endif /* OMP_40_ENABLED */
1710
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001711#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001712 if (__kmp_tasking_mode != tskm_immediate_exec) {
1713 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1714 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001715 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001716#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001717
Jonathan Peyton30419822017-05-12 18:01:32 +00001718 if (parent_team->t.t_active_level >=
1719 master_th->th.th_current_task->td_icvs.max_active_levels) {
1720 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001721 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001722#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001723 int enter_teams = ((ap == NULL && active_level == 0) ||
1724 (ap && teams_level > 0 && teams_level == level));
Andrey Churbanov92effc42015-08-18 10:08:27 +00001725#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001726 nthreads =
1727 master_set_numthreads
1728 ? master_set_numthreads
1729 : get__nproc_2(
1730 parent_team,
1731 master_tid); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001732
Jonathan Peyton30419822017-05-12 18:01:32 +00001733 // Check if we need to take forkjoin lock? (no need for serialized
1734 // parallel out of teams construct). This code moved here from
1735 // __kmp_reserve_threads() to speedup nested serialized parallels.
1736 if (nthreads > 1) {
1737 if ((!get__nested(master_th) && (root->r.r_in_parallel
Andrey Churbanov92effc42015-08-18 10:08:27 +00001738#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001739 && !enter_teams
Andrey Churbanov92effc42015-08-18 10:08:27 +00001740#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001741 )) ||
1742 (__kmp_library == library_serial)) {
Jonathan Peyton642688b2017-06-01 16:46:36 +00001743 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"
1744 " threads\n",
1745 gtid, nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +00001746 nthreads = 1;
Andrey Churbanov92effc42015-08-18 10:08:27 +00001747 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001748 }
1749 if (nthreads > 1) {
1750 /* determine how many new threads we can use */
1751 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jonathan Peyton30419822017-05-12 18:01:32 +00001752 nthreads = __kmp_reserve_threads(
1753 root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001754#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001755 /* AC: If we execute teams from parallel region (on host), then
1756 teams should be created but each can only have 1 thread if
1757 nesting is disabled. If teams called from serial region, then
1758 teams and their threads should be created regardless of the
1759 nesting setting. */
1760 ,
1761 enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001762#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001763 );
1764 if (nthreads == 1) {
1765 // Free lock for single thread execution here; for multi-thread
1766 // execution it will be freed later after team of threads created
1767 // and initialized
1768 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Andrey Churbanov92effc42015-08-18 10:08:27 +00001769 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001770 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001771 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001772 KMP_DEBUG_ASSERT(nthreads > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001773
Jonathan Peyton30419822017-05-12 18:01:32 +00001774 // If we temporarily changed the set number of threads then restore it now
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001775 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001776
Jim Cownie5e8470a2013-09-27 10:38:44 +00001777 /* create a serialized parallel region? */
Jonathan Peyton30419822017-05-12 18:01:32 +00001778 if (nthreads == 1) {
1779/* josh todo: hypothetical question: what do we do for OS X*? */
1780#if KMP_OS_LINUX && \
1781 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1782 void *args[argc];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001783#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001784 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *));
1785#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1786 KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001787
Jonathan Peyton30419822017-05-12 18:01:32 +00001788 KA_TRACE(20,
1789 ("__kmp_fork_call: T#%d serializing parallel region\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001790
Jonathan Peyton30419822017-05-12 18:01:32 +00001791 __kmpc_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001792
Jonathan Peyton30419822017-05-12 18:01:32 +00001793 if (call_context == fork_context_intel) {
1794 /* TODO this sucks, use the compiler itself to pass args! :) */
1795 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001796#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001797 if (!ap) {
1798 // revert change made in __kmpc_serialized_parallel()
1799 master_th->th.th_serial_team->t.t_level--;
1800// Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001801
1802#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001803 void *dummy;
1804 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00001805 ompt_task_info_t *task_info;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001806
Jonathan Peyton30419822017-05-12 18:01:32 +00001807 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001808
Joachim Protze82e94a52017-11-01 10:08:30 +00001809 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001810 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
Joachim Protze82e94a52017-11-01 10:08:30 +00001811 &ompt_parallel_data, return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001812
Joachim Protze82e94a52017-11-01 10:08:30 +00001813 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1814 // don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001815
Joachim Protze82e94a52017-11-01 10:08:30 +00001816 task_info = OMPT_CUR_TASK_INFO(master_th);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001817 exit_runtime_p = &(task_info->frame.exit_frame.ptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001818 if (ompt_enabled.ompt_callback_implicit_task) {
1819 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1820 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
Joachim Protze2b46d302019-01-15 15:36:53 +00001821 &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Joachim Protze9be9cf22018-05-07 12:42:21 +00001822 OMPT_CUR_TASK_INFO(master_th)
1823 ->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001824 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001825
Jonathan Peyton30419822017-05-12 18:01:32 +00001826 /* OMPT state */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001827 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001828 } else {
1829 exit_runtime_p = &dummy;
1830 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001831#endif
1832
Jonathan Peyton30419822017-05-12 18:01:32 +00001833 {
1834 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1835 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1836 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1837 parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001838#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001839 ,
1840 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001841#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001842 );
1843 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001844
1845#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001846 if (ompt_enabled.enabled) {
1847 exit_runtime_p = NULL;
1848 if (ompt_enabled.ompt_callback_implicit_task) {
1849 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1850 ompt_scope_end, NULL, &(task_info->task_data), 1,
Joachim Protze2b46d302019-01-15 15:36:53 +00001851 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Jonathan Peyton30419822017-05-12 18:01:32 +00001852 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001853
Jonathan Peyton30419822017-05-12 18:01:32 +00001854 __ompt_lw_taskteam_unlink(master_th);
Joachim Protze82e94a52017-11-01 10:08:30 +00001855 if (ompt_enabled.ompt_callback_parallel_end) {
1856 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1857 OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
1858 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001859 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001860 master_th->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001861 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001862#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001863 } else if (microtask == (microtask_t)__kmp_teams_master) {
1864 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1865 master_th->th.th_serial_team);
1866 team = master_th->th.th_team;
1867 // team->t.t_pkfn = microtask;
1868 team->t.t_invoke = invoker;
1869 __kmp_alloc_argv_entries(argc, team, TRUE);
1870 team->t.t_argc = argc;
1871 argv = (void **)team->t.t_argv;
1872 if (ap) {
1873 for (i = argc - 1; i >= 0; --i)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001874// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001875#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001876 *argv++ = va_arg(*ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001877#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001878 *argv++ = va_arg(ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001879#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001880 } else {
1881 for (i = 0; i < argc; ++i)
1882 // Get args from parent team for teams construct
1883 argv[i] = parent_team->t.t_argv[i];
1884 }
1885 // AC: revert change made in __kmpc_serialized_parallel()
1886 // because initial code in teams should have level=0
1887 team->t.t_level--;
1888 // AC: call special invoker for outer "parallel" of teams construct
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001889 invoker(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001890 } else {
1891#endif /* OMP_40_ENABLED */
1892 argv = args;
1893 for (i = argc - 1; i >= 0; --i)
1894// TODO: revert workaround for Intel(R) 64 tracker #96
1895#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1896 *argv++ = va_arg(*ap, void *);
1897#else
1898 *argv++ = va_arg(ap, void *);
1899#endif
1900 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001901
1902#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001903 void *dummy;
1904 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00001905 ompt_task_info_t *task_info;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001906
Jonathan Peyton30419822017-05-12 18:01:32 +00001907 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001908
Joachim Protze82e94a52017-11-01 10:08:30 +00001909 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001910 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
Joachim Protze82e94a52017-11-01 10:08:30 +00001911 &ompt_parallel_data, return_address);
1912 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1913 // don't use lw_taskteam after linking. content was swaped
1914 task_info = OMPT_CUR_TASK_INFO(master_th);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001915 exit_runtime_p = &(task_info->frame.exit_frame.ptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001916
Jonathan Peyton30419822017-05-12 18:01:32 +00001917 /* OMPT implicit task begin */
Joachim Protze82e94a52017-11-01 10:08:30 +00001918 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1919 if (ompt_enabled.ompt_callback_implicit_task) {
1920 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1921 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
Joachim Protze2b46d302019-01-15 15:36:53 +00001922 implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Joachim Protze9be9cf22018-05-07 12:42:21 +00001923 OMPT_CUR_TASK_INFO(master_th)
1924 ->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001925 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001926
Jonathan Peyton30419822017-05-12 18:01:32 +00001927 /* OMPT state */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001928 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001929 } else {
1930 exit_runtime_p = &dummy;
1931 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001932#endif
1933
Jonathan Peyton30419822017-05-12 18:01:32 +00001934 {
1935 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1936 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1937 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001938#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001939 ,
1940 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001941#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001942 );
1943 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001944
1945#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001946 if (ompt_enabled.enabled) {
1947 *exit_runtime_p = NULL;
1948 if (ompt_enabled.ompt_callback_implicit_task) {
1949 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1950 ompt_scope_end, NULL, &(task_info->task_data), 1,
Joachim Protze2b46d302019-01-15 15:36:53 +00001951 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Jonathan Peyton30419822017-05-12 18:01:32 +00001952 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001953
Joachim Protze82e94a52017-11-01 10:08:30 +00001954 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
Jonathan Peyton30419822017-05-12 18:01:32 +00001955 __ompt_lw_taskteam_unlink(master_th);
Joachim Protze82e94a52017-11-01 10:08:30 +00001956 if (ompt_enabled.ompt_callback_parallel_end) {
1957 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1958 &ompt_parallel_data, parent_task_data,
1959 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001960 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001961 master_th->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001962 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001963#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001964#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001966#endif /* OMP_40_ENABLED */
1967 } else if (call_context == fork_context_gnu) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001968#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001969 ompt_lw_taskteam_t lwt;
1970 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1971 return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001972
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001973 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001974 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1975// don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001976#endif
1977
Jonathan Peyton30419822017-05-12 18:01:32 +00001978 // we were called from GNU native code
1979 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001980 return FALSE;
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001981 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001982 KMP_ASSERT2(call_context < fork_context_last,
1983 "__kmp_fork_call: unknown fork_context parameter");
1984 }
1985
1986 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
1987 KMP_MB();
1988 return FALSE;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001989 } // if (nthreads == 1)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001990
Jim Cownie5e8470a2013-09-27 10:38:44 +00001991 // GEH: only modify the executing flag in the case when not serialized
1992 // serialized case is handled in kmpc_serialized_parallel
Jonathan Peyton30419822017-05-12 18:01:32 +00001993 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1994 "curtask=%p, curtask_max_aclevel=%d\n",
1995 parent_team->t.t_active_level, master_th,
1996 master_th->th.th_current_task,
1997 master_th->th.th_current_task->td_icvs.max_active_levels));
1998 // TODO: GEH - cannot do this assertion because root thread not set up as
1999 // executing
Jim Cownie5e8470a2013-09-27 10:38:44 +00002000 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
2001 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002002
2003#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002004 if (!master_th->th.th_teams_microtask || level > teams_level)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002005#endif /* OMP_40_ENABLED */
2006 {
Jonathan Peyton30419822017-05-12 18:01:32 +00002007 /* Increment our nested depth level */
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00002008 KMP_ATOMIC_INC(&root->r.r_in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002009 }
2010
Jim Cownie5e8470a2013-09-27 10:38:44 +00002011 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002012 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jonathan Peyton30419822017-05-12 18:01:32 +00002013 if ((level + 1 < __kmp_nested_nth.used) &&
2014 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2015 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2016 } else {
2017 nthreads_icv = 0; // don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00002018 }
2019
2020#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002021 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002022 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jonathan Peyton30419822017-05-12 18:01:32 +00002023 kmp_proc_bind_t proc_bind_icv =
2024 proc_bind_default; // proc_bind_default means don't update
2025 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2026 proc_bind = proc_bind_false;
2027 } else {
2028 if (proc_bind == proc_bind_default) {
2029 // No proc_bind clause specified; use current proc-bind-var for this
2030 // parallel region
2031 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2032 }
2033 /* else: The proc_bind policy was specified explicitly on parallel clause.
2034 This overrides proc-bind-var for this parallel region, but does not
2035 change proc-bind-var. */
2036 // Figure the value of proc-bind-var for the child threads.
2037 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2038 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2039 master_th->th.th_current_task->td_icvs.proc_bind)) {
2040 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2041 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002042 }
2043
Jim Cownie5e8470a2013-09-27 10:38:44 +00002044 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00002045 master_th->th.th_set_proc_bind = proc_bind_default;
2046#endif /* OMP_40_ENABLED */
2047
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002048 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002049#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002050 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002051#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002052 ) {
2053 kmp_internal_control_t new_icvs;
2054 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2055 new_icvs.next = NULL;
2056 if (nthreads_icv > 0) {
2057 new_icvs.nproc = nthreads_icv;
2058 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002059
2060#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002061 if (proc_bind_icv != proc_bind_default) {
2062 new_icvs.proc_bind = proc_bind_icv;
2063 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002064#endif /* OMP_40_ENABLED */
2065
Jonathan Peyton30419822017-05-12 18:01:32 +00002066 /* allocate a new parallel team */
2067 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2068 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002069#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002070 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002071#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002072#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002073 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002074#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002075 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002076 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002077 /* allocate a new parallel team */
2078 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2079 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002080#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002081 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002082#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002083#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002084 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002085#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002086 &master_th->th.th_current_task->td_icvs,
2087 argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002088 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002089 KF_TRACE(
2090 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002091
2092 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002093 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2094 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2095 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2096 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2097 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002098#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002099 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2100 return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002101#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002102 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
2103// TODO: parent_team->t.t_level == INT_MAX ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00002104#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002105 if (!master_th->th.th_teams_microtask || level > teams_level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002106#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002107 int new_level = parent_team->t.t_level + 1;
2108 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2109 new_level = parent_team->t.t_active_level + 1;
2110 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002111#if OMP_40_ENABLED
2112 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002113 // AC: Do not increase parallel level at start of the teams construct
2114 int new_level = parent_team->t.t_level;
2115 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2116 new_level = parent_team->t.t_active_level;
2117 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002118 }
2119#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002120 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00002121 // set master's schedule as new run-time schedule
2122 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002123
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002124#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002125 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002126#endif
Jonathan Peyton92ca6182018-09-07 18:25:49 +00002127#if OMP_50_ENABLED
2128 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2129#endif
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002130
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002131 // Update the floating point rounding in the team if required.
2132 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002133
Jonathan Peyton30419822017-05-12 18:01:32 +00002134 if (__kmp_tasking_mode != tskm_immediate_exec) {
2135 // Set master's task team to team's task team. Unless this is hot team, it
2136 // should be NULL.
Jonathan Peyton30419822017-05-12 18:01:32 +00002137 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2138 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peyton30419822017-05-12 18:01:32 +00002139 KA_TRACE(20, ("__kmp_fork_call: Master T#%d pushing task_team %p / team "
2140 "%p, new task_team %p / team %p\n",
2141 __kmp_gtid_from_thread(master_th),
2142 master_th->th.th_task_team, parent_team,
2143 team->t.t_task_team[master_th->th.th_task_state], team));
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002144
Jonathan Peyton30419822017-05-12 18:01:32 +00002145 if (active_level || master_th->th.th_task_team) {
2146 // Take a memo of master's task_state
2147 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2148 if (master_th->th.th_task_state_top >=
2149 master_th->th.th_task_state_stack_sz) { // increase size
2150 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2151 kmp_uint8 *old_stack, *new_stack;
2152 kmp_uint32 i;
2153 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2154 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2155 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2156 }
2157 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2158 ++i) { // zero-init rest of stack
2159 new_stack[i] = 0;
2160 }
2161 old_stack = master_th->th.th_task_state_memo_stack;
2162 master_th->th.th_task_state_memo_stack = new_stack;
2163 master_th->th.th_task_state_stack_sz = new_size;
2164 __kmp_free(old_stack);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002165 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002166 // Store master's task_state on stack
2167 master_th->th
2168 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2169 master_th->th.th_task_state;
2170 master_th->th.th_task_state_top++;
2171#if KMP_NESTED_HOT_TEAMS
Jonathan Peytonca10a762018-08-24 18:05:00 +00002172 if (master_th->th.th_hot_teams &&
Jonathan Peytonf4c07202018-11-28 20:15:11 +00002173 active_level < __kmp_hot_teams_max_level &&
Jonathan Peytonca10a762018-08-24 18:05:00 +00002174 team == master_th->th.th_hot_teams[active_level].hot_team) {
Jonathan Peyton642688b2017-06-01 16:46:36 +00002175 // Restore master's nested state if nested hot team
Jonathan Peyton30419822017-05-12 18:01:32 +00002176 master_th->th.th_task_state =
2177 master_th->th
2178 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2179 } else {
2180#endif
2181 master_th->th.th_task_state = 0;
2182#if KMP_NESTED_HOT_TEAMS
2183 }
2184#endif
2185 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002186#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002187 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2188 (team == root->r.r_hot_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002189#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002190 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002191
Jonathan Peyton30419822017-05-12 18:01:32 +00002192 KA_TRACE(
2193 20,
2194 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2195 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2196 team->t.t_nproc));
2197 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2198 (team->t.t_master_tid == 0 &&
2199 (team->t.t_parent == root->r.r_root_team ||
2200 team->t.t_parent->t.t_serialized)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002201 KMP_MB();
2202
2203 /* now, setup the arguments */
Jonathan Peyton30419822017-05-12 18:01:32 +00002204 argv = (void **)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002205#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002206 if (ap) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002207#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002208 for (i = argc - 1; i >= 0; --i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002209// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002210#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00002211 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002212#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002213 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002214#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002215 KMP_CHECK_UPDATE(*argv, new_argv);
2216 argv++;
2217 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002218#if OMP_40_ENABLED
2219 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002220 for (i = 0; i < argc; ++i) {
2221 // Get args from parent team for teams construct
2222 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2223 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002224 }
2225#endif /* OMP_40_ENABLED */
2226
2227 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002228 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002229 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
Jonathan Peyton30419822017-05-12 18:01:32 +00002230 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002231
Jonathan Peyton30419822017-05-12 18:01:32 +00002232 __kmp_fork_team_threads(root, team, master_th, gtid);
2233 __kmp_setup_icv_copy(team, nthreads,
2234 &master_th->th.th_current_task->td_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002235
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002236#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002237 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002238#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002239
Jonathan Peyton30419822017-05-12 18:01:32 +00002240 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002241
Jim Cownie5e8470a2013-09-27 10:38:44 +00002242#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002243 if (team->t.t_active_level == 1 // only report frames at level 1
2244#if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002245 && !master_th->th.th_teams_microtask // not in teams construct
Jonathan Peyton30419822017-05-12 18:01:32 +00002246#endif /* OMP_40_ENABLED */
2247 ) {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002248#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002249 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2250 (__kmp_forkjoin_frames_mode == 3 ||
2251 __kmp_forkjoin_frames_mode == 1)) {
2252 kmp_uint64 tmp_time = 0;
2253 if (__itt_get_timestamp_ptr)
2254 tmp_time = __itt_get_timestamp();
2255 // Internal fork - report frame begin
2256 master_th->th.th_frame_time = tmp_time;
2257 if (__kmp_forkjoin_frames_mode == 3)
2258 team->t.t_region_time = tmp_time;
Jonathan Peyton642688b2017-06-01 16:46:36 +00002259 } else
2260// only one notification scheme (either "submit" or "forking/joined", not both)
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002261#endif /* USE_ITT_NOTIFY */
Jonathan Peyton30419822017-05-12 18:01:32 +00002262 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2263 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
Jonathan Peyton8c432f22018-01-04 22:56:47 +00002264 // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
Jonathan Peyton30419822017-05-12 18:01:32 +00002265 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2266 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002267 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002268#endif /* USE_ITT_BUILD */
2269
2270 /* now go on and do the work */
Jonathan Peyton30419822017-05-12 18:01:32 +00002271 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002272 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00002273 KF_TRACE(10,
2274 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2275 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002276
2277#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002278 if (__itt_stack_caller_create_ptr) {
2279 team->t.t_stack_id =
2280 __kmp_itt_stack_caller_create(); // create new stack stitching id
2281 // before entering fork barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002282 }
2283#endif /* USE_ITT_BUILD */
2284
2285#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00002286 // AC: skip __kmp_internal_fork at teams construct, let only master
2287 // threads execute
2288 if (ap)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002289#endif /* OMP_40_ENABLED */
2290 {
Jonathan Peyton30419822017-05-12 18:01:32 +00002291 __kmp_internal_fork(loc, gtid, team);
2292 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "
2293 "master_th=%p, gtid=%d\n",
2294 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002295 }
2296
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002297 if (call_context == fork_context_gnu) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002298 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2299 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002300 }
2301
2302 /* Invoke microtask for MASTER thread */
Jonathan Peyton30419822017-05-12 18:01:32 +00002303 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2304 team->t.t_id, team->t.t_pkfn));
2305 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002306
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00002307 if (!team->t.t_invoke(gtid)) {
2308 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
Jonathan Peyton30419822017-05-12 18:01:32 +00002309 }
2310 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2311 team->t.t_id, team->t.t_pkfn));
2312 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002313
Jonathan Peyton30419822017-05-12 18:01:32 +00002314 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002315
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002316#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002317 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002318 master_th->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00002319 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002320#endif
2321
Jonathan Peyton30419822017-05-12 18:01:32 +00002322 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002323}
2324
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002325#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002326static inline void __kmp_join_restore_state(kmp_info_t *thread,
2327 kmp_team_t *team) {
2328 // restore state outside the region
2329 thread->th.ompt_thread_info.state =
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002330 ((team->t.t_serialized) ? ompt_state_work_serial
2331 : ompt_state_work_parallel);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002332}
2333
Joachim Protze82e94a52017-11-01 10:08:30 +00002334static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
2335 kmp_team_t *team, ompt_data_t *parallel_data,
2336 fork_context_e fork_context, void *codeptr) {
2337 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2338 if (ompt_enabled.ompt_callback_parallel_end) {
2339 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2340 parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
2341 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002342 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002343
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002344 task_info->frame.enter_frame = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +00002345 __kmp_join_restore_state(thread, team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002346}
2347#endif
2348
Jonathan Peyton30419822017-05-12 18:01:32 +00002349void __kmp_join_call(ident_t *loc, int gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002350#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002351 ,
2352 enum fork_context_e fork_context
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002353#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002355 ,
2356 int exit_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00002357#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002358 ) {
2359 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2360 kmp_team_t *team;
2361 kmp_team_t *parent_team;
2362 kmp_info_t *master_th;
2363 kmp_root_t *root;
2364 int master_active;
2365 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002366
Jonathan Peyton30419822017-05-12 18:01:32 +00002367 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002368
Jonathan Peyton30419822017-05-12 18:01:32 +00002369 /* setup current data */
2370 master_th = __kmp_threads[gtid];
2371 root = master_th->th.th_root;
2372 team = master_th->th.th_team;
2373 parent_team = team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002374
Jonathan Peyton30419822017-05-12 18:01:32 +00002375 master_th->th.th_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002376
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002377#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002378 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002379 master_th->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00002380 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002381#endif
2382
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002383#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00002384 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2385 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2386 "th_task_team = %p\n",
2387 __kmp_gtid_from_thread(master_th), team,
2388 team->t.t_task_team[master_th->th.th_task_state],
2389 master_th->th.th_task_team));
2390 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2391 team->t.t_task_team[master_th->th.th_task_state]);
2392 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002393#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002394
Jonathan Peyton30419822017-05-12 18:01:32 +00002395 if (team->t.t_serialized) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002396#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002397 if (master_th->th.th_teams_microtask) {
2398 // We are in teams construct
2399 int level = team->t.t_level;
2400 int tlevel = master_th->th.th_teams_level;
2401 if (level == tlevel) {
2402 // AC: we haven't incremented it earlier at start of teams construct,
2403 // so do it here - at the end of teams construct
2404 team->t.t_level++;
2405 } else if (level == tlevel + 1) {
2406 // AC: we are exiting parallel inside teams, need to increment
2407 // serialization in order to restore it in the next call to
2408 // __kmpc_end_serialized_parallel
2409 team->t.t_serialized++;
2410 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002411 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002412#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002413 __kmpc_end_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002414
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002415#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002416 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002417 __kmp_join_restore_state(master_th, parent_team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002418 }
2419#endif
2420
Jonathan Peyton30419822017-05-12 18:01:32 +00002421 return;
2422 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002423
Jonathan Peyton30419822017-05-12 18:01:32 +00002424 master_active = team->t.t_master_active;
2425
2426#if OMP_40_ENABLED
2427 if (!exit_teams)
2428#endif /* OMP_40_ENABLED */
2429 {
2430 // AC: No barrier for internal teams at exit from teams construct.
2431 // But there is barrier for external team (league).
2432 __kmp_internal_join(loc, gtid, team);
2433 }
2434#if OMP_40_ENABLED
2435 else {
2436 master_th->th.th_task_state =
2437 0; // AC: no tasking in teams (out of any parallel)
2438 }
2439#endif /* OMP_40_ENABLED */
2440
2441 KMP_MB();
2442
2443#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002444 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2445 void *codeptr = team->t.ompt_team_info.master_return_address;
Jonathan Peyton30419822017-05-12 18:01:32 +00002446#endif
2447
2448#if USE_ITT_BUILD
2449 if (__itt_stack_caller_create_ptr) {
2450 __kmp_itt_stack_caller_destroy(
2451 (__itt_caller)team->t
2452 .t_stack_id); // destroy the stack stitching id after join barrier
2453 }
2454
Jonathan Peyton8c432f22018-01-04 22:56:47 +00002455 // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
Jonathan Peyton30419822017-05-12 18:01:32 +00002456 if (team->t.t_active_level == 1
2457#if OMP_40_ENABLED
2458 && !master_th->th.th_teams_microtask /* not in teams construct */
2459#endif /* OMP_40_ENABLED */
2460 ) {
2461 master_th->th.th_ident = loc;
2462 // only one notification scheme (either "submit" or "forking/joined", not
2463 // both)
2464 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2465 __kmp_forkjoin_frames_mode == 3)
2466 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2467 master_th->th.th_frame_time, 0, loc,
2468 master_th->th.th_team_nproc, 1);
2469 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2470 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2471 __kmp_itt_region_joined(gtid);
2472 } // active_level == 1
2473#endif /* USE_ITT_BUILD */
2474
2475#if OMP_40_ENABLED
2476 if (master_th->th.th_teams_microtask && !exit_teams &&
2477 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2478 team->t.t_level == master_th->th.th_teams_level + 1) {
2479 // AC: We need to leave the team structure intact at the end of parallel
2480 // inside the teams construct, so that at the next parallel same (hot) team
2481 // works, only adjust nesting levels
2482
2483 /* Decrement our nested depth level */
2484 team->t.t_level--;
2485 team->t.t_active_level--;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00002486 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
Jonathan Peyton30419822017-05-12 18:01:32 +00002487
2488 /* Restore number of threads in the team if needed */
2489 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2490 int old_num = master_th->th.th_team_nproc;
2491 int new_num = master_th->th.th_teams_size.nth;
2492 kmp_info_t **other_threads = team->t.t_threads;
2493 team->t.t_nproc = new_num;
2494 for (i = 0; i < old_num; ++i) {
2495 other_threads[i]->th.th_team_nproc = new_num;
2496 }
2497 // Adjust states of non-used threads of the team
2498 for (i = old_num; i < new_num; ++i) {
2499 // Re-initialize thread's barrier data.
2500 int b;
2501 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2502 for (b = 0; b < bs_last_barrier; ++b) {
2503 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2504 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2505#if USE_DEBUGGER
2506 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2507#endif
2508 }
2509 if (__kmp_tasking_mode != tskm_immediate_exec) {
2510 // Synchronize thread's task state
2511 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2512 }
2513 }
2514 }
2515
2516#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002517 if (ompt_enabled.enabled) {
2518 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2519 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002520 }
2521#endif
2522
2523 return;
2524 }
2525#endif /* OMP_40_ENABLED */
2526
2527 /* do cleanup and restore the parent team */
2528 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2529 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2530
2531 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2532
2533 /* jc: The following lock has instructions with REL and ACQ semantics,
2534 separating the parallel user code called in this parallel region
2535 from the serial user code called after this function returns. */
2536 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2537
2538#if OMP_40_ENABLED
2539 if (!master_th->th.th_teams_microtask ||
2540 team->t.t_level > master_th->th.th_teams_level)
2541#endif /* OMP_40_ENABLED */
2542 {
2543 /* Decrement our nested depth level */
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00002544 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
Jonathan Peyton30419822017-05-12 18:01:32 +00002545 }
2546 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2547
Joachim Protze82e94a52017-11-01 10:08:30 +00002548#if OMPT_SUPPORT
2549 if (ompt_enabled.enabled) {
2550 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2551 if (ompt_enabled.ompt_callback_implicit_task) {
2552 int ompt_team_size = team->t.t_nproc;
2553 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2554 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
Joachim Protze2b46d302019-01-15 15:36:53 +00002555 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Jonathan Peyton30419822017-05-12 18:01:32 +00002556 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002557
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002558 task_info->frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00002559 task_info->task_data = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +00002560 }
2561#endif
2562
2563 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2564 master_th, team));
2565 __kmp_pop_current_task_from_thread(master_th);
2566
2567#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2568 // Restore master thread's partition.
2569 master_th->th.th_first_place = team->t.t_first_place;
2570 master_th->th.th_last_place = team->t.t_last_place;
2571#endif /* OMP_40_ENABLED */
Jonathan Peyton92ca6182018-09-07 18:25:49 +00002572#if OMP_50_ENABLED
2573 master_th->th.th_def_allocator = team->t.t_def_allocator;
2574#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002575
2576 updateHWFPControl(team);
2577
2578 if (root->r.r_active != master_active)
2579 root->r.r_active = master_active;
2580
2581 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2582 master_th)); // this will free worker threads
2583
2584 /* this race was fun to find. make sure the following is in the critical
2585 region otherwise assertions may fail occasionally since the old team may be
2586 reallocated and the hierarchy appears inconsistent. it is actually safe to
2587 run and won't cause any bugs, but will cause those assertion failures. it's
2588 only one deref&assign so might as well put this in the critical region */
2589 master_th->th.th_team = parent_team;
2590 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2591 master_th->th.th_team_master = parent_team->t.t_threads[0];
2592 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2593
2594 /* restore serialized team, if need be */
2595 if (parent_team->t.t_serialized &&
2596 parent_team != master_th->th.th_serial_team &&
2597 parent_team != root->r.r_root_team) {
2598 __kmp_free_team(root,
2599 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2600 master_th->th.th_serial_team = parent_team;
2601 }
2602
2603 if (__kmp_tasking_mode != tskm_immediate_exec) {
2604 if (master_th->th.th_task_state_top >
2605 0) { // Restore task state from memo stack
2606 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2607 // Remember master's state if we re-use this nested hot team
2608 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2609 master_th->th.th_task_state;
2610 --master_th->th.th_task_state_top; // pop
2611 // Now restore state at this level
2612 master_th->th.th_task_state =
2613 master_th->th
2614 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2615 }
2616 // Copy the task team from the parent team to the master thread
2617 master_th->th.th_task_team =
2618 parent_team->t.t_task_team[master_th->th.th_task_state];
2619 KA_TRACE(20,
2620 ("__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2621 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2622 parent_team));
2623 }
2624
2625 // TODO: GEH - cannot do this assertion because root thread not set up as
2626 // executing
2627 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2628 master_th->th.th_current_task->td_flags.executing = 1;
2629
2630 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2631
2632#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002633 if (ompt_enabled.enabled) {
2634 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2635 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002636 }
2637#endif
2638
2639 KMP_MB();
2640 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid));
2641}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002642
2643/* Check whether we should push an internal control record onto the
2644 serial team stack. If so, do it. */
Jonathan Peyton30419822017-05-12 18:01:32 +00002645void __kmp_save_internal_controls(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002646
Jonathan Peyton30419822017-05-12 18:01:32 +00002647 if (thread->th.th_team != thread->th.th_serial_team) {
2648 return;
2649 }
2650 if (thread->th.th_team->t.t_serialized > 1) {
2651 int push = 0;
2652
2653 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2654 push = 1;
2655 } else {
2656 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2657 thread->th.th_team->t.t_serialized) {
2658 push = 1;
2659 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002660 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002661 if (push) { /* push a record on the serial team's stack */
2662 kmp_internal_control_t *control =
2663 (kmp_internal_control_t *)__kmp_allocate(
2664 sizeof(kmp_internal_control_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002665
Jonathan Peyton30419822017-05-12 18:01:32 +00002666 copy_icvs(control, &thread->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002667
Jonathan Peyton30419822017-05-12 18:01:32 +00002668 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002669
Jonathan Peyton30419822017-05-12 18:01:32 +00002670 control->next = thread->th.th_team->t.t_control_stack_top;
2671 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002672 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002673 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002674}
2675
2676/* Changes set_nproc */
Jonathan Peyton30419822017-05-12 18:01:32 +00002677void __kmp_set_num_threads(int new_nth, int gtid) {
2678 kmp_info_t *thread;
2679 kmp_root_t *root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002680
Jonathan Peyton30419822017-05-12 18:01:32 +00002681 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2682 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002683
Jonathan Peyton30419822017-05-12 18:01:32 +00002684 if (new_nth < 1)
2685 new_nth = 1;
2686 else if (new_nth > __kmp_max_nth)
2687 new_nth = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002688
Jonathan Peyton30419822017-05-12 18:01:32 +00002689 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
2690 thread = __kmp_threads[gtid];
Andrey Churbanov82318c62018-11-14 13:49:41 +00002691 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2692 return; // nothing to do
Jim Cownie5e8470a2013-09-27 10:38:44 +00002693
Jonathan Peyton30419822017-05-12 18:01:32 +00002694 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002695
Jonathan Peyton30419822017-05-12 18:01:32 +00002696 set__nproc(thread, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002697
Jonathan Peyton30419822017-05-12 18:01:32 +00002698 // If this omp_set_num_threads() call will cause the hot team size to be
2699 // reduced (in the absence of a num_threads clause), then reduce it now,
2700 // rather than waiting for the next parallel region.
2701 root = thread->th.th_root;
2702 if (__kmp_init_parallel && (!root->r.r_active) &&
2703 (root->r.r_hot_team->t.t_nproc > new_nth)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002704#if KMP_NESTED_HOT_TEAMS
2705 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2706#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002707 ) {
2708 kmp_team_t *hot_team = root->r.r_hot_team;
2709 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002710
Jonathan Peyton30419822017-05-12 18:01:32 +00002711 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002712
Jonathan Peyton30419822017-05-12 18:01:32 +00002713 // Release the extra threads we don't need any more.
2714 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2715 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2716 if (__kmp_tasking_mode != tskm_immediate_exec) {
2717 // When decreasing team size, threads no longer in the team should unref
2718 // task team.
2719 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2720 }
2721 __kmp_free_thread(hot_team->t.t_threads[f]);
2722 hot_team->t.t_threads[f] = NULL;
2723 }
2724 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002725#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002726 if (thread->th.th_hot_teams) {
2727 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2728 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2729 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002730#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002731
Jonathan Peyton30419822017-05-12 18:01:32 +00002732 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002733
Jonathan Peyton30419822017-05-12 18:01:32 +00002734 // Update the t_nproc field in the threads that are still active.
2735 for (f = 0; f < new_nth; f++) {
2736 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2737 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002738 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002739 // Special flag in case omp_set_num_threads() call
2740 hot_team->t.t_size_changed = -1;
2741 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002742}
2743
Jim Cownie5e8470a2013-09-27 10:38:44 +00002744/* Changes max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002745void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2746 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002747
Jonathan Peyton30419822017-05-12 18:01:32 +00002748 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "
2749 "%d = (%d)\n",
2750 gtid, max_active_levels));
2751 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002752
Jonathan Peyton30419822017-05-12 18:01:32 +00002753 // validate max_active_levels
2754 if (max_active_levels < 0) {
2755 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2756 // We ignore this call if the user has specified a negative value.
2757 // The current setting won't be changed. The last valid setting will be
2758 // used. A warning will be issued (if warnings are allowed as controlled by
2759 // the KMP_WARNINGS env var).
2760 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "
2761 "max_active_levels for thread %d = (%d)\n",
2762 gtid, max_active_levels));
2763 return;
2764 }
2765 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2766 // it's OK, the max_active_levels is within the valid range: [ 0;
2767 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2768 // We allow a zero value. (implementation defined behavior)
2769 } else {
2770 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2771 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2772 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2773 // Current upper limit is MAX_INT. (implementation defined behavior)
2774 // If the input exceeds the upper limit, we correct the input to be the
2775 // upper limit. (implementation defined behavior)
2776 // Actually, the flow should never get here until we use MAX_INT limit.
2777 }
2778 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "
2779 "max_active_levels for thread %d = (%d)\n",
2780 gtid, max_active_levels));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002781
Jonathan Peyton30419822017-05-12 18:01:32 +00002782 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002783
Jonathan Peyton30419822017-05-12 18:01:32 +00002784 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002785
Jonathan Peyton30419822017-05-12 18:01:32 +00002786 set__max_active_levels(thread, max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002787}
2788
2789/* Gets max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002790int __kmp_get_max_active_levels(int gtid) {
2791 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002792
Jonathan Peyton30419822017-05-12 18:01:32 +00002793 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid));
2794 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002795
Jonathan Peyton30419822017-05-12 18:01:32 +00002796 thread = __kmp_threads[gtid];
2797 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2798 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
2799 "curtask_maxaclevel=%d\n",
2800 gtid, thread->th.th_current_task,
2801 thread->th.th_current_task->td_icvs.max_active_levels));
2802 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002803}
2804
2805/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
Jonathan Peyton30419822017-05-12 18:01:32 +00002806void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2807 kmp_info_t *thread;
2808 // kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002809
Jonathan Peyton30419822017-05-12 18:01:32 +00002810 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2811 gtid, (int)kind, chunk));
2812 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002813
Jonathan Peyton30419822017-05-12 18:01:32 +00002814 // Check if the kind parameter is valid, correct if needed.
2815 // Valid parameters should fit in one of two intervals - standard or extended:
2816 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2817 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2818 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2819 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2820 // TODO: Hint needs attention in case we change the default schedule.
2821 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2822 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"),
2823 __kmp_msg_null);
2824 kind = kmp_sched_default;
2825 chunk = 0; // ignore chunk value in case of bad kind
2826 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002827
Jonathan Peyton30419822017-05-12 18:01:32 +00002828 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002829
Jonathan Peyton30419822017-05-12 18:01:32 +00002830 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002831
Jonathan Peyton30419822017-05-12 18:01:32 +00002832 if (kind < kmp_sched_upper_std) {
2833 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2834 // differ static chunked vs. unchunked: chunk should be invalid to
2835 // indicate unchunked schedule (which is the default)
2836 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002837 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002838 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2839 __kmp_sch_map[kind - kmp_sched_lower - 1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002840 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002841 } else {
2842 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2843 // kmp_sched_lower - 2 ];
2844 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2845 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2846 kmp_sched_lower - 2];
2847 }
Andrey Churbanovd454c732017-06-05 17:17:33 +00002848 if (kind == kmp_sched_auto || chunk < 1) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002849 // ignore parameter chunk for schedule auto
2850 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2851 } else {
2852 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2853 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002854}
2855
2856/* Gets def_sched_var ICV values */
Jonathan Peyton30419822017-05-12 18:01:32 +00002857void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2858 kmp_info_t *thread;
2859 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002860
Jonathan Peyton30419822017-05-12 18:01:32 +00002861 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid));
2862 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002863
Jonathan Peyton30419822017-05-12 18:01:32 +00002864 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002865
Jonathan Peyton30419822017-05-12 18:01:32 +00002866 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002867
Jonathan Peyton30419822017-05-12 18:01:32 +00002868 switch (th_type) {
2869 case kmp_sch_static:
2870 case kmp_sch_static_greedy:
2871 case kmp_sch_static_balanced:
2872 *kind = kmp_sched_static;
2873 *chunk = 0; // chunk was not set, try to show this fact via zero value
2874 return;
2875 case kmp_sch_static_chunked:
2876 *kind = kmp_sched_static;
2877 break;
2878 case kmp_sch_dynamic_chunked:
2879 *kind = kmp_sched_dynamic;
2880 break;
2881 case kmp_sch_guided_chunked:
2882 case kmp_sch_guided_iterative_chunked:
2883 case kmp_sch_guided_analytical_chunked:
2884 *kind = kmp_sched_guided;
2885 break;
2886 case kmp_sch_auto:
2887 *kind = kmp_sched_auto;
2888 break;
2889 case kmp_sch_trapezoidal:
2890 *kind = kmp_sched_trapezoidal;
2891 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002892#if KMP_STATIC_STEAL_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002893 case kmp_sch_static_steal:
2894 *kind = kmp_sched_static_steal;
2895 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002896#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002897 default:
2898 KMP_FATAL(UnknownSchedulingType, th_type);
2899 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002900
Jonathan Peyton30419822017-05-12 18:01:32 +00002901 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002902}
2903
Jonathan Peyton30419822017-05-12 18:01:32 +00002904int __kmp_get_ancestor_thread_num(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002905
Jonathan Peyton30419822017-05-12 18:01:32 +00002906 int ii, dd;
2907 kmp_team_t *team;
2908 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002909
Jonathan Peyton30419822017-05-12 18:01:32 +00002910 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2911 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002912
Jonathan Peyton30419822017-05-12 18:01:32 +00002913 // validate level
2914 if (level == 0)
2915 return 0;
2916 if (level < 0)
2917 return -1;
2918 thr = __kmp_threads[gtid];
2919 team = thr->th.th_team;
2920 ii = team->t.t_level;
2921 if (level > ii)
2922 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002923
2924#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002925 if (thr->th.th_teams_microtask) {
2926 // AC: we are in teams region where multiple nested teams have same level
2927 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2928 if (level <=
2929 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2930 KMP_DEBUG_ASSERT(ii >= tlevel);
2931 // AC: As we need to pass by the teams league, we need to artificially
2932 // increase ii
2933 if (ii == tlevel) {
2934 ii += 2; // three teams have same level
2935 } else {
2936 ii++; // two teams have same level
2937 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002938 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002939 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002940#endif
2941
Jonathan Peyton30419822017-05-12 18:01:32 +00002942 if (ii == level)
2943 return __kmp_tid_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002944
Jonathan Peyton30419822017-05-12 18:01:32 +00002945 dd = team->t.t_serialized;
2946 level++;
2947 while (ii > level) {
2948 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002949 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002950 if ((team->t.t_serialized) && (!dd)) {
2951 team = team->t.t_parent;
2952 continue;
2953 }
2954 if (ii > level) {
2955 team = team->t.t_parent;
2956 dd = team->t.t_serialized;
2957 ii--;
2958 }
2959 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002960
Jonathan Peyton30419822017-05-12 18:01:32 +00002961 return (dd > 1) ? (0) : (team->t.t_master_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002962}
2963
Jonathan Peyton30419822017-05-12 18:01:32 +00002964int __kmp_get_team_size(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002965
Jonathan Peyton30419822017-05-12 18:01:32 +00002966 int ii, dd;
2967 kmp_team_t *team;
2968 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002969
Jonathan Peyton30419822017-05-12 18:01:32 +00002970 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level));
2971 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002972
Jonathan Peyton30419822017-05-12 18:01:32 +00002973 // validate level
2974 if (level == 0)
2975 return 1;
2976 if (level < 0)
2977 return -1;
2978 thr = __kmp_threads[gtid];
2979 team = thr->th.th_team;
2980 ii = team->t.t_level;
2981 if (level > ii)
2982 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002983
2984#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002985 if (thr->th.th_teams_microtask) {
2986 // AC: we are in teams region where multiple nested teams have same level
2987 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2988 if (level <=
2989 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2990 KMP_DEBUG_ASSERT(ii >= tlevel);
2991 // AC: As we need to pass by the teams league, we need to artificially
2992 // increase ii
2993 if (ii == tlevel) {
2994 ii += 2; // three teams have same level
2995 } else {
2996 ii++; // two teams have same level
2997 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002998 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002999 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003000#endif
3001
Jonathan Peyton30419822017-05-12 18:01:32 +00003002 while (ii > level) {
3003 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003004 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003005 if (team->t.t_serialized && (!dd)) {
3006 team = team->t.t_parent;
3007 continue;
3008 }
3009 if (ii > level) {
3010 team = team->t.t_parent;
3011 ii--;
3012 }
3013 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003014
Jonathan Peyton30419822017-05-12 18:01:32 +00003015 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003016}
3017
Jonathan Peyton30419822017-05-12 18:01:32 +00003018kmp_r_sched_t __kmp_get_schedule_global() {
3019 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
3020 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
3021 // independently. So one can get the updated schedule here.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003022
Jonathan Peyton30419822017-05-12 18:01:32 +00003023 kmp_r_sched_t r_sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003024
Jonathan Peyton30419822017-05-12 18:01:32 +00003025 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
3026 // __kmp_guided. __kmp_sched should keep original value, so that user can set
3027 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
3028 // different roots (even in OMP 2.5)
3029 if (__kmp_sched == kmp_sch_static) {
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003030 // replace STATIC with more detailed schedule (balanced or greedy)
3031 r_sched.r_sched_type = __kmp_static;
Jonathan Peyton30419822017-05-12 18:01:32 +00003032 } else if (__kmp_sched == kmp_sch_guided_chunked) {
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003033 // replace GUIDED with more detailed schedule (iterative or analytical)
3034 r_sched.r_sched_type = __kmp_guided;
3035 } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
3036 r_sched.r_sched_type = __kmp_sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00003037 }
3038
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003039 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3040 // __kmp_chunk may be wrong here (if it was not ever set)
Jonathan Peyton30419822017-05-12 18:01:32 +00003041 r_sched.chunk = KMP_DEFAULT_CHUNK;
3042 } else {
3043 r_sched.chunk = __kmp_chunk;
3044 }
3045
3046 return r_sched;
3047}
3048
3049/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
3050 at least argc number of *t_argv entries for the requested team. */
3051static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
3052
3053 KMP_DEBUG_ASSERT(team);
3054 if (!realloc || argc > team->t.t_max_argc) {
3055
3056 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3057 "current entries=%d\n",
3058 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3059 /* if previously allocated heap space for args, free them */
3060 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3061 __kmp_free((void *)team->t.t_argv);
3062
3063 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3064 /* use unused space in the cache line for arguments */
3065 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3066 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
3067 "argv entries\n",
3068 team->t.t_id, team->t.t_max_argc));
3069 team->t.t_argv = &team->t.t_inline_argv[0];
3070 if (__kmp_storage_map) {
3071 __kmp_print_storage_map_gtid(
3072 -1, &team->t.t_inline_argv[0],
3073 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3074 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv",
3075 team->t.t_id);
3076 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003077 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003078 /* allocate space for arguments in the heap */
3079 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3080 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3081 : 2 * argc;
3082 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3083 "argv entries\n",
3084 team->t.t_id, team->t.t_max_argc));
3085 team->t.t_argv =
3086 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc);
3087 if (__kmp_storage_map) {
3088 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3089 &team->t.t_argv[team->t.t_max_argc],
3090 sizeof(void *) * team->t.t_max_argc,
3091 "team_%d.t_argv", team->t.t_id);
3092 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003093 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003094 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003095}
3096
Jonathan Peyton30419822017-05-12 18:01:32 +00003097static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
3098 int i;
3099 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3100 team->t.t_threads =
3101 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth);
3102 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3103 sizeof(dispatch_shared_info_t) * num_disp_buff);
3104 team->t.t_dispatch =
3105 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth);
3106 team->t.t_implicit_task_taskdata =
3107 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth);
3108 team->t.t_max_nproc = max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003109
Jonathan Peyton30419822017-05-12 18:01:32 +00003110 /* setup dispatch buffers */
3111 for (i = 0; i < num_disp_buff; ++i) {
3112 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003113#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003114 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003115#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003116 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003117}
3118
Jonathan Peyton30419822017-05-12 18:01:32 +00003119static void __kmp_free_team_arrays(kmp_team_t *team) {
3120 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3121 int i;
3122 for (i = 0; i < team->t.t_max_nproc; ++i) {
3123 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3124 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3125 team->t.t_dispatch[i].th_disp_buffer = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003126 }
3127 }
Jonathan Peytonf6399362018-07-09 17:51:13 +00003128#if KMP_USE_HIER_SCHED
3129 __kmp_dispatch_free_hierarchies(team);
3130#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003131 __kmp_free(team->t.t_threads);
3132 __kmp_free(team->t.t_disp_buffer);
3133 __kmp_free(team->t.t_dispatch);
3134 __kmp_free(team->t.t_implicit_task_taskdata);
3135 team->t.t_threads = NULL;
3136 team->t.t_disp_buffer = NULL;
3137 team->t.t_dispatch = NULL;
3138 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003139}
3140
Jonathan Peyton30419822017-05-12 18:01:32 +00003141static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3142 kmp_info_t **oldThreads = team->t.t_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003143
Jonathan Peyton30419822017-05-12 18:01:32 +00003144 __kmp_free(team->t.t_disp_buffer);
3145 __kmp_free(team->t.t_dispatch);
3146 __kmp_free(team->t.t_implicit_task_taskdata);
3147 __kmp_allocate_team_arrays(team, max_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003148
Jonathan Peyton30419822017-05-12 18:01:32 +00003149 KMP_MEMCPY(team->t.t_threads, oldThreads,
3150 team->t.t_nproc * sizeof(kmp_info_t *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003151
Jonathan Peyton30419822017-05-12 18:01:32 +00003152 __kmp_free(oldThreads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003153}
3154
Jonathan Peyton30419822017-05-12 18:01:32 +00003155static kmp_internal_control_t __kmp_get_global_icvs(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003156
Jonathan Peyton30419822017-05-12 18:01:32 +00003157 kmp_r_sched_t r_sched =
3158 __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003159
3160#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003161 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003162#endif /* OMP_40_ENABLED */
3163
Jonathan Peyton30419822017-05-12 18:01:32 +00003164 kmp_internal_control_t g_icvs = {
3165 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3166 (kmp_int8)__kmp_dflt_nested, // int nested; //internal control
3167 // for nested parallelism (per thread)
3168 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3169 // adjustment of threads (per thread)
3170 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3171 // whether blocktime is explicitly set
3172 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003173#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00003174 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3175// intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003176#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003177 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3178 // next parallel region (per thread)
3179 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3180 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3181 // for max_active_levels
3182 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3183// {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003184#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003185 __kmp_nested_proc_bind.bind_types[0],
3186 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003187#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00003188 NULL // struct kmp_internal_control *next;
3189 };
Jim Cownie5e8470a2013-09-27 10:38:44 +00003190
Jonathan Peyton30419822017-05-12 18:01:32 +00003191 return g_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003192}
3193
Jonathan Peyton30419822017-05-12 18:01:32 +00003194static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003195
Jonathan Peyton30419822017-05-12 18:01:32 +00003196 kmp_internal_control_t gx_icvs;
3197 gx_icvs.serial_nesting_level =
3198 0; // probably =team->t.t_serial like in save_inter_controls
3199 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3200 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003201
Jonathan Peyton30419822017-05-12 18:01:32 +00003202 return gx_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003203}
3204
Jonathan Peyton30419822017-05-12 18:01:32 +00003205static void __kmp_initialize_root(kmp_root_t *root) {
3206 int f;
3207 kmp_team_t *root_team;
3208 kmp_team_t *hot_team;
3209 int hot_team_max_nth;
3210 kmp_r_sched_t r_sched =
3211 __kmp_get_schedule_global(); // get current state of scheduling globals
3212 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3213 KMP_DEBUG_ASSERT(root);
3214 KMP_ASSERT(!root->r.r_begin);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003215
Jonathan Peyton30419822017-05-12 18:01:32 +00003216 /* setup the root state structure */
3217 __kmp_init_lock(&root->r.r_begin_lock);
3218 root->r.r_begin = FALSE;
3219 root->r.r_active = FALSE;
3220 root->r.r_in_parallel = 0;
3221 root->r.r_blocktime = __kmp_dflt_blocktime;
3222 root->r.r_nested = __kmp_dflt_nested;
Jonathan Peytonf4392462017-07-27 20:58:41 +00003223 root->r.r_cg_nthreads = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003224
Jonathan Peyton30419822017-05-12 18:01:32 +00003225 /* setup the root team for this task */
3226 /* allocate the root team structure */
3227 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003228
Jonathan Peyton30419822017-05-12 18:01:32 +00003229 root_team =
3230 __kmp_allocate_team(root,
3231 1, // new_nproc
3232 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003233#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003234 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003235#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003236#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003237 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003238#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003239 &r_icvs,
3240 0 // argc
3241 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3242 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003243#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00003244 // Non-NULL value should be assigned to make the debugger display the root
3245 // team.
3246 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003247#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003248
Jonathan Peyton30419822017-05-12 18:01:32 +00003249 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003250
Jonathan Peyton30419822017-05-12 18:01:32 +00003251 root->r.r_root_team = root_team;
3252 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003253
Jonathan Peyton30419822017-05-12 18:01:32 +00003254 /* initialize root team */
3255 root_team->t.t_threads[0] = NULL;
3256 root_team->t.t_nproc = 1;
3257 root_team->t.t_serialized = 1;
3258 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003259 root_team->t.t_sched.sched = r_sched.sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00003260 KA_TRACE(
3261 20,
3262 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3263 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003264
Jonathan Peyton30419822017-05-12 18:01:32 +00003265 /* setup the hot team for this task */
3266 /* allocate the hot team structure */
3267 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003268
Jonathan Peyton30419822017-05-12 18:01:32 +00003269 hot_team =
3270 __kmp_allocate_team(root,
3271 1, // new_nproc
3272 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003273#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003274 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003275#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003276#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003277 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003278#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003279 &r_icvs,
3280 0 // argc
3281 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3282 );
3283 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003284
Jonathan Peyton30419822017-05-12 18:01:32 +00003285 root->r.r_hot_team = hot_team;
3286 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003287
Jonathan Peyton30419822017-05-12 18:01:32 +00003288 /* first-time initialization */
3289 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003290
Jonathan Peyton30419822017-05-12 18:01:32 +00003291 /* initialize hot team */
3292 hot_team_max_nth = hot_team->t.t_max_nproc;
3293 for (f = 0; f < hot_team_max_nth; ++f) {
3294 hot_team->t.t_threads[f] = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003295 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003296 hot_team->t.t_nproc = 1;
3297 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003298 hot_team->t.t_sched.sched = r_sched.sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00003299 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003300}
3301
3302#ifdef KMP_DEBUG
3303
Jim Cownie5e8470a2013-09-27 10:38:44 +00003304typedef struct kmp_team_list_item {
Jonathan Peyton30419822017-05-12 18:01:32 +00003305 kmp_team_p const *entry;
3306 struct kmp_team_list_item *next;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003307} kmp_team_list_item_t;
Jonathan Peyton30419822017-05-12 18:01:32 +00003308typedef kmp_team_list_item_t *kmp_team_list_t;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003309
Jonathan Peyton30419822017-05-12 18:01:32 +00003310static void __kmp_print_structure_team_accum( // Add team to list of teams.
3311 kmp_team_list_t list, // List of teams.
3312 kmp_team_p const *team // Team to add.
3313 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003314
Jonathan Peyton30419822017-05-12 18:01:32 +00003315 // List must terminate with item where both entry and next are NULL.
3316 // Team is added to the list only once.
3317 // List is sorted in ascending order by team id.
3318 // Team id is *not* a key.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003319
Jonathan Peyton30419822017-05-12 18:01:32 +00003320 kmp_team_list_t l;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003321
Jonathan Peyton30419822017-05-12 18:01:32 +00003322 KMP_DEBUG_ASSERT(list != NULL);
3323 if (team == NULL) {
3324 return;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003325 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003326
Jonathan Peyton30419822017-05-12 18:01:32 +00003327 __kmp_print_structure_team_accum(list, team->t.t_parent);
3328 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003329
Jonathan Peyton30419822017-05-12 18:01:32 +00003330 // Search list for the team.
3331 l = list;
3332 while (l->next != NULL && l->entry != team) {
3333 l = l->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003334 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003335 if (l->next != NULL) {
3336 return; // Team has been added before, exit.
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003337 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003338
Jonathan Peyton30419822017-05-12 18:01:32 +00003339 // Team is not found. Search list again for insertion point.
3340 l = list;
3341 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3342 l = l->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003343 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003344
Jonathan Peyton30419822017-05-12 18:01:32 +00003345 // Insert team.
3346 {
3347 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3348 sizeof(kmp_team_list_item_t));
3349 *item = *l;
3350 l->entry = team;
3351 l->next = item;
3352 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003353}
3354
Jonathan Peyton30419822017-05-12 18:01:32 +00003355static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
Jim Cownie5e8470a2013-09-27 10:38:44 +00003356
Jonathan Peyton30419822017-05-12 18:01:32 +00003357 ) {
3358 __kmp_printf("%s", title);
3359 if (team != NULL) {
3360 __kmp_printf("%2x %p\n", team->t.t_id, team);
3361 } else {
3362 __kmp_printf(" - (nil)\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003363 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003364}
3365
Jonathan Peyton30419822017-05-12 18:01:32 +00003366static void __kmp_print_structure_thread(char const *title,
3367 kmp_info_p const *thread) {
3368 __kmp_printf("%s", title);
3369 if (thread != NULL) {
3370 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3371 } else {
3372 __kmp_printf(" - (nil)\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003373 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003374}
3375
Jonathan Peyton30419822017-05-12 18:01:32 +00003376void __kmp_print_structure(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003377
Jonathan Peyton30419822017-05-12 18:01:32 +00003378 kmp_team_list_t list;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003379
Jonathan Peyton30419822017-05-12 18:01:32 +00003380 // Initialize list of teams.
3381 list =
3382 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t));
3383 list->entry = NULL;
3384 list->next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003385
Jonathan Peyton30419822017-05-12 18:01:32 +00003386 __kmp_printf("\n------------------------------\nGlobal Thread "
3387 "Table\n------------------------------\n");
3388 {
3389 int gtid;
3390 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3391 __kmp_printf("%2d", gtid);
3392 if (__kmp_threads != NULL) {
3393 __kmp_printf(" %p", __kmp_threads[gtid]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003394 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003395 if (__kmp_root != NULL) {
3396 __kmp_printf(" %p", __kmp_root[gtid]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003397 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003398 __kmp_printf("\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003399 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003400 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003401
Jonathan Peyton30419822017-05-12 18:01:32 +00003402 // Print out __kmp_threads array.
3403 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3404 "----------\n");
3405 if (__kmp_threads != NULL) {
3406 int gtid;
3407 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3408 kmp_info_t const *thread = __kmp_threads[gtid];
3409 if (thread != NULL) {
3410 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3411 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3412 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3413 __kmp_print_structure_team(" Serial Team: ",
3414 thread->th.th_serial_team);
3415 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3416 __kmp_print_structure_thread(" Master: ",
3417 thread->th.th_team_master);
3418 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3419 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003420#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003421 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003422#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003423 __kmp_print_structure_thread(" Next in pool: ",
3424 thread->th.th_next_pool);
3425 __kmp_printf("\n");
3426 __kmp_print_structure_team_accum(list, thread->th.th_team);
3427 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003428 }
3429 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003430 } else {
3431 __kmp_printf("Threads array is not allocated.\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003432 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003433
Jonathan Peyton30419822017-05-12 18:01:32 +00003434 // Print out __kmp_root array.
3435 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3436 "--------\n");
3437 if (__kmp_root != NULL) {
3438 int gtid;
3439 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3440 kmp_root_t const *root = __kmp_root[gtid];
3441 if (root != NULL) {
3442 __kmp_printf("GTID %2d %p:\n", gtid, root);
3443 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3444 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3445 __kmp_print_structure_thread(" Uber Thread: ",
3446 root->r.r_uber_thread);
3447 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3448 __kmp_printf(" Nested?: %2d\n", root->r.r_nested);
Jonathan Peyton61d44f12018-07-09 18:09:25 +00003449 __kmp_printf(" In Parallel: %2d\n",
3450 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
Jonathan Peyton30419822017-05-12 18:01:32 +00003451 __kmp_printf("\n");
3452 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3453 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003454 }
3455 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003456 } else {
3457 __kmp_printf("Ubers array is not allocated.\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003458 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003459
Jonathan Peyton30419822017-05-12 18:01:32 +00003460 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3461 "--------\n");
3462 while (list->next != NULL) {
3463 kmp_team_p const *team = list->entry;
3464 int i;
3465 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3466 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3467 __kmp_printf(" Master TID: %2d\n", team->t.t_master_tid);
3468 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3469 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3470 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3471 for (i = 0; i < team->t.t_nproc; ++i) {
3472 __kmp_printf(" Thread %2d: ", i);
3473 __kmp_print_structure_thread("", team->t.t_threads[i]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003474 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003475 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3476 __kmp_printf("\n");
3477 list = list->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003478 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003479
Jonathan Peyton30419822017-05-12 18:01:32 +00003480 // Print out __kmp_thread_pool and __kmp_team_pool.
3481 __kmp_printf("\n------------------------------\nPools\n----------------------"
3482 "--------\n");
3483 __kmp_print_structure_thread("Thread pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003484 CCAST(kmp_info_t *, __kmp_thread_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003485 __kmp_print_structure_team("Team pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003486 CCAST(kmp_team_t *, __kmp_team_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003487 __kmp_printf("\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +00003488
Jonathan Peyton30419822017-05-12 18:01:32 +00003489 // Free team list.
3490 while (list != NULL) {
3491 kmp_team_list_item_t *item = list;
3492 list = list->next;
3493 KMP_INTERNAL_FREE(item);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003494 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003495}
3496
3497#endif
3498
Jim Cownie5e8470a2013-09-27 10:38:44 +00003499//---------------------------------------------------------------------------
3500// Stuff for per-thread fast random number generator
3501// Table of primes
Jim Cownie5e8470a2013-09-27 10:38:44 +00003502static const unsigned __kmp_primes[] = {
Jonathan Peyton30419822017-05-12 18:01:32 +00003503 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3504 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3505 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3506 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3507 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3508 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3509 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3510 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3511 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3512 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3513 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
Jim Cownie5e8470a2013-09-27 10:38:44 +00003514
3515//---------------------------------------------------------------------------
3516// __kmp_get_random: Get a random number using a linear congruential method.
Jonathan Peyton30419822017-05-12 18:01:32 +00003517unsigned short __kmp_get_random(kmp_info_t *thread) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003518 unsigned x = thread->th.th_x;
Jonathan Peyton30419822017-05-12 18:01:32 +00003519 unsigned short r = x >> 16;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003520
Jonathan Peyton30419822017-05-12 18:01:32 +00003521 thread->th.th_x = x * thread->th.th_a + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003522
3523 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
Jonathan Peyton30419822017-05-12 18:01:32 +00003524 thread->th.th_info.ds.ds_tid, r));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003525
3526 return r;
3527}
3528//--------------------------------------------------------
3529// __kmp_init_random: Initialize a random number generator
Jonathan Peyton30419822017-05-12 18:01:32 +00003530void __kmp_init_random(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003531 unsigned seed = thread->th.th_info.ds.ds_tid;
3532
Jonathan Peyton30419822017-05-12 18:01:32 +00003533 thread->th.th_a =
3534 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3535 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3536 KA_TRACE(30,
3537 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003538}
3539
Jim Cownie5e8470a2013-09-27 10:38:44 +00003540#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00003541/* reclaim array entries for root threads that are already dead, returns number
3542 * reclaimed */
3543static int __kmp_reclaim_dead_roots(void) {
3544 int i, r = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003545
Jonathan Peyton30419822017-05-12 18:01:32 +00003546 for (i = 0; i < __kmp_threads_capacity; ++i) {
3547 if (KMP_UBER_GTID(i) &&
3548 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3549 !__kmp_root[i]
3550 ->r.r_active) { // AC: reclaim only roots died in non-active state
3551 r += __kmp_unregister_root_other_thread(i);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003552 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003553 }
3554 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003555}
3556#endif
3557
Jonathan Peyton30419822017-05-12 18:01:32 +00003558/* This function attempts to create free entries in __kmp_threads and
3559 __kmp_root, and returns the number of free entries generated.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003560
Jonathan Peyton30419822017-05-12 18:01:32 +00003561 For Windows* OS static library, the first mechanism used is to reclaim array
3562 entries for root threads that are already dead.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003563
Jonathan Peyton30419822017-05-12 18:01:32 +00003564 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3565 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3566 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3567 threadprivate cache array has been created. Synchronization with
3568 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003569
Jonathan Peyton30419822017-05-12 18:01:32 +00003570 After any dead root reclamation, if the clipping value allows array expansion
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003571 to result in the generation of a total of nNeed free slots, the function does
3572 that expansion. If not, nothing is done beyond the possible initial root
3573 thread reclamation.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003574
Jonathan Peyton30419822017-05-12 18:01:32 +00003575 If any argument is negative, the behavior is undefined. */
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003576static int __kmp_expand_threads(int nNeed) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003577 int added = 0;
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003578 int minimumRequiredCapacity;
3579 int newCapacity;
3580 kmp_info_t **newThreads;
3581 kmp_root_t **newRoot;
3582
3583// All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so
3584// resizing __kmp_threads does not need additional protection if foreign
3585// threads are present
Jim Cownie5e8470a2013-09-27 10:38:44 +00003586
Jonathan Peyton8b3842f2018-10-05 17:59:39 +00003587#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +00003588 /* only for Windows static library */
3589 /* reclaim array entries for root threads that are already dead */
3590 added = __kmp_reclaim_dead_roots();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003591
Jonathan Peyton30419822017-05-12 18:01:32 +00003592 if (nNeed) {
3593 nNeed -= added;
3594 if (nNeed < 0)
3595 nNeed = 0;
3596 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003597#endif
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003598 if (nNeed <= 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003599 return added;
Jonathan Peyton30419822017-05-12 18:01:32 +00003600
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003601 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3602 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3603 // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
3604 // > __kmp_max_nth in one of two ways:
3605 //
3606 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3607 // may not be resused by another thread, so we may need to increase
3608 // __kmp_threads_capacity to __kmp_max_nth + 1.
3609 //
3610 // 2) New foreign root(s) are encountered. We always register new foreign
3611 // roots. This may cause a smaller # of threads to be allocated at
3612 // subsequent parallel regions, but the worker threads hang around (and
3613 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3614 //
3615 // Anyway, that is the reason for moving the check to see if
3616 // __kmp_max_nth was exceeded into __kmp_reserve_threads()
3617 // instead of having it performed here. -BB
Jonathan Peyton30419822017-05-12 18:01:32 +00003618
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003619 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
Jonathan Peyton30419822017-05-12 18:01:32 +00003620
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003621 /* compute expansion headroom to check if we can expand */
3622 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3623 /* possible expansion too small -- give up */
3624 return added;
Jonathan Peyton30419822017-05-12 18:01:32 +00003625 }
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003626 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3627
3628 newCapacity = __kmp_threads_capacity;
3629 do {
3630 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3631 : __kmp_sys_max_nth;
3632 } while (newCapacity < minimumRequiredCapacity);
3633 newThreads = (kmp_info_t **)__kmp_allocate(
3634 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3635 newRoot =
3636 (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity);
3637 KMP_MEMCPY(newThreads, __kmp_threads,
3638 __kmp_threads_capacity * sizeof(kmp_info_t *));
3639 KMP_MEMCPY(newRoot, __kmp_root,
3640 __kmp_threads_capacity * sizeof(kmp_root_t *));
3641
3642 kmp_info_t **temp_threads = __kmp_threads;
3643 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3644 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3645 __kmp_free(temp_threads);
3646 added += newCapacity - __kmp_threads_capacity;
3647 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3648
3649 if (newCapacity > __kmp_tp_capacity) {
3650 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3651 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3652 __kmp_threadprivate_resize_cache(newCapacity);
3653 } else { // increase __kmp_tp_capacity to correspond with kmp_threads size
3654 *(volatile int *)&__kmp_tp_capacity = newCapacity;
3655 }
3656 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3657 }
3658
Jonathan Peyton30419822017-05-12 18:01:32 +00003659 return added;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003660}
3661
Jonathan Peyton30419822017-05-12 18:01:32 +00003662/* Register the current thread as a root thread and obtain our gtid. We must
3663 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3664 thread that calls from __kmp_do_serial_initialize() */
3665int __kmp_register_root(int initial_thread) {
3666 kmp_info_t *root_thread;
3667 kmp_root_t *root;
3668 int gtid;
3669 int capacity;
3670 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3671 KA_TRACE(20, ("__kmp_register_root: entered\n"));
3672 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003673
Jonathan Peyton30419822017-05-12 18:01:32 +00003674 /* 2007-03-02:
3675 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3676 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3677 work as expected -- it may return false (that means there is at least one
3678 empty slot in __kmp_threads array), but it is possible the only free slot
3679 is #0, which is reserved for initial thread and so cannot be used for this
3680 one. Following code workarounds this bug.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003681
Jonathan Peyton30419822017-05-12 18:01:32 +00003682 However, right solution seems to be not reserving slot #0 for initial
3683 thread because:
3684 (1) there is no magic in slot #0,
3685 (2) we cannot detect initial thread reliably (the first thread which does
3686 serial initialization may be not a real initial thread).
3687 */
3688 capacity = __kmp_threads_capacity;
3689 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3690 --capacity;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003691 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003692
Jonathan Peyton30419822017-05-12 18:01:32 +00003693 /* see if there are too many threads */
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003694 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003695 if (__kmp_tp_cached) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00003696 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3697 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3698 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00003699 } else {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00003700 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3701 __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003702 }
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003703 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003704
3705 /* find an available thread slot */
3706 /* Don't reassign the zero slot since we need that to only be used by initial
3707 thread */
3708 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3709 gtid++)
3710 ;
3711 KA_TRACE(1,
3712 ("__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3713 KMP_ASSERT(gtid < __kmp_threads_capacity);
3714
3715 /* update global accounting */
3716 __kmp_all_nth++;
3717 TCW_4(__kmp_nth, __kmp_nth + 1);
3718
3719 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3720 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3721 if (__kmp_adjust_gtid_mode) {
3722 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3723 if (TCR_4(__kmp_gtid_mode) != 2) {
3724 TCW_4(__kmp_gtid_mode, 2);
3725 }
3726 } else {
3727 if (TCR_4(__kmp_gtid_mode) != 1) {
3728 TCW_4(__kmp_gtid_mode, 1);
3729 }
3730 }
3731 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003732
3733#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00003734 /* Adjust blocktime to zero if necessary */
3735 /* Middle initialization might not have occurred yet */
3736 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3737 if (__kmp_nth > __kmp_avail_proc) {
3738 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003739 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003740 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003741#endif /* KMP_ADJUST_BLOCKTIME */
3742
Jonathan Peyton30419822017-05-12 18:01:32 +00003743 /* setup this new hierarchy */
3744 if (!(root = __kmp_root[gtid])) {
3745 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t));
3746 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3747 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003748
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003749#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003750 // Initialize stats as soon as possible (right after gtid assignment).
3751 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00003752 __kmp_stats_thread_ptr->startLife();
Jonathan Peyton30419822017-05-12 18:01:32 +00003753 KMP_SET_THREAD_STATE(SERIAL_REGION);
3754 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003755#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003756 __kmp_initialize_root(root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003757
Jonathan Peyton30419822017-05-12 18:01:32 +00003758 /* setup new root thread structure */
3759 if (root->r.r_uber_thread) {
3760 root_thread = root->r.r_uber_thread;
3761 } else {
3762 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
3763 if (__kmp_storage_map) {
3764 __kmp_print_thread_storage_map(root_thread, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003765 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003766 root_thread->th.th_info.ds.ds_gtid = gtid;
Joachim Protze82e94a52017-11-01 10:08:30 +00003767#if OMPT_SUPPORT
Jonathan Peyton3574f282018-10-04 14:57:04 +00003768 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003769#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003770 root_thread->th.th_root = root;
3771 if (__kmp_env_consistency_check) {
3772 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3773 }
3774#if USE_FAST_MEMORY
3775 __kmp_initialize_fast_memory(root_thread);
3776#endif /* USE_FAST_MEMORY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003777
Jonathan Peyton30419822017-05-12 18:01:32 +00003778#if KMP_USE_BGET
3779 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3780 __kmp_initialize_bget(root_thread);
3781#endif
3782 __kmp_init_random(root_thread); // Initialize random number generator
3783 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003784
Jonathan Peyton30419822017-05-12 18:01:32 +00003785 /* setup the serial team held in reserve by the root thread */
3786 if (!root_thread->th.th_serial_team) {
3787 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3788 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"));
3789 root_thread->th.th_serial_team =
3790 __kmp_allocate_team(root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003791#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003792 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003793#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003794#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003795 proc_bind_default,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003796#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003797 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3798 }
3799 KMP_ASSERT(root_thread->th.th_serial_team);
3800 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",
3801 root_thread->th.th_serial_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003802
Jonathan Peyton30419822017-05-12 18:01:32 +00003803 /* drop root_thread into place */
3804 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003805
Jonathan Peyton30419822017-05-12 18:01:32 +00003806 root->r.r_root_team->t.t_threads[0] = root_thread;
3807 root->r.r_hot_team->t.t_threads[0] = root_thread;
3808 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3809 // AC: the team created in reserve, not for execution (it is unused for now).
3810 root_thread->th.th_serial_team->t.t_serialized = 0;
3811 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003812
Jonathan Peyton30419822017-05-12 18:01:32 +00003813 /* initialize the thread, get it ready to go */
3814 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3815 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003816
Jonathan Peyton30419822017-05-12 18:01:32 +00003817 /* prepare the master thread for get_gtid() */
3818 __kmp_gtid_set_specific(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003819
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003820#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003821 __kmp_itt_thread_name(gtid);
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003822#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003823
Jonathan Peyton30419822017-05-12 18:01:32 +00003824#ifdef KMP_TDATA_GTID
3825 __kmp_gtid = gtid;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003826#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003827 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3828 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3829
3830 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3831 "plain=%u\n",
3832 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3833 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3834 KMP_INIT_BARRIER_STATE));
3835 { // Initialize barrier data.
3836 int b;
3837 for (b = 0; b < bs_last_barrier; ++b) {
3838 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3839#if USE_DEBUGGER
3840 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3841#endif
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003842 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003843 }
3844 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3845 KMP_INIT_BARRIER_STATE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003846
Alp Toker763b9392014-02-28 09:42:41 +00003847#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00003848#if OMP_40_ENABLED
3849 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3850 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3851 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3852 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3853#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003854 if (TCR_4(__kmp_init_middle)) {
3855 __kmp_affinity_set_init_mask(gtid, TRUE);
3856 }
Alp Toker763b9392014-02-28 09:42:41 +00003857#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton92ca6182018-09-07 18:25:49 +00003858#if OMP_50_ENABLED
3859 root_thread->th.th_def_allocator = __kmp_def_allocator;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00003860 root_thread->th.th_prev_level = 0;
3861 root_thread->th.th_prev_num_threads = 1;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00003862#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003863
Jonathan Peyton30419822017-05-12 18:01:32 +00003864 __kmp_root_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003865
Joachim Protze82e94a52017-11-01 10:08:30 +00003866#if OMPT_SUPPORT
3867 if (!initial_thread && ompt_enabled.enabled) {
3868
Joachim Protze489cdb72018-09-10 14:34:54 +00003869 kmp_info_t *root_thread = ompt_get_thread();
Joachim Protze82e94a52017-11-01 10:08:30 +00003870
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003871 ompt_set_thread_state(root_thread, ompt_state_overhead);
Joachim Protze82e94a52017-11-01 10:08:30 +00003872
3873 if (ompt_enabled.ompt_callback_thread_begin) {
3874 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3875 ompt_thread_initial, __ompt_get_thread_data_internal());
3876 }
3877 ompt_data_t *task_data;
3878 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
3879 if (ompt_enabled.ompt_callback_task_create) {
3880 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
3881 NULL, NULL, task_data, ompt_task_initial, 0, NULL);
3882 // initial task has nothing to return to
3883 }
3884
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003885 ompt_set_thread_state(root_thread, ompt_state_work_serial);
Joachim Protze82e94a52017-11-01 10:08:30 +00003886 }
3887#endif
3888
Jonathan Peyton30419822017-05-12 18:01:32 +00003889 KMP_MB();
3890 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003891
Jonathan Peyton30419822017-05-12 18:01:32 +00003892 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003893}
3894
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003895#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003896static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
3897 const int max_level) {
3898 int i, n, nth;
3899 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3900 if (!hot_teams || !hot_teams[level].hot_team) {
3901 return 0;
3902 }
3903 KMP_DEBUG_ASSERT(level < max_level);
3904 kmp_team_t *team = hot_teams[level].hot_team;
3905 nth = hot_teams[level].hot_team_nth;
3906 n = nth - 1; // master is not freed
3907 if (level < max_level - 1) {
3908 for (i = 0; i < nth; ++i) {
3909 kmp_info_t *th = team->t.t_threads[i];
3910 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3911 if (i > 0 && th->th.th_hot_teams) {
3912 __kmp_free(th->th.th_hot_teams);
3913 th->th.th_hot_teams = NULL;
3914 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003915 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003916 }
3917 __kmp_free_team(root, team, NULL);
3918 return n;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003919}
3920#endif
3921
Jonathan Peyton30419822017-05-12 18:01:32 +00003922// Resets a root thread and clear its root and hot teams.
3923// Returns the number of __kmp_threads entries directly and indirectly freed.
3924static int __kmp_reset_root(int gtid, kmp_root_t *root) {
3925 kmp_team_t *root_team = root->r.r_root_team;
3926 kmp_team_t *hot_team = root->r.r_hot_team;
3927 int n = hot_team->t.t_nproc;
3928 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003929
Jonathan Peyton30419822017-05-12 18:01:32 +00003930 KMP_DEBUG_ASSERT(!root->r.r_active);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003931
Jonathan Peyton30419822017-05-12 18:01:32 +00003932 root->r.r_root_team = NULL;
3933 root->r.r_hot_team = NULL;
3934 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
3935 // before call to __kmp_free_team().
3936 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003937#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003938 if (__kmp_hot_teams_max_level >
3939 0) { // need to free nested hot teams and their threads if any
3940 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3941 kmp_info_t *th = hot_team->t.t_threads[i];
3942 if (__kmp_hot_teams_max_level > 1) {
3943 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3944 }
3945 if (th->th.th_hot_teams) {
3946 __kmp_free(th->th.th_hot_teams);
3947 th->th.th_hot_teams = NULL;
3948 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003949 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003950 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003951#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003952 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003953
Jonathan Peyton30419822017-05-12 18:01:32 +00003954 // Before we can reap the thread, we need to make certain that all other
3955 // threads in the teams that had this root as ancestor have stopped trying to
3956 // steal tasks.
3957 if (__kmp_tasking_mode != tskm_immediate_exec) {
3958 __kmp_wait_to_unref_task_teams();
3959 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003960
Jonathan Peyton30419822017-05-12 18:01:32 +00003961#if KMP_OS_WINDOWS
3962 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3963 KA_TRACE(
3964 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3965 "\n",
3966 (LPVOID) & (root->r.r_uber_thread->th),
3967 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3968 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3969#endif /* KMP_OS_WINDOWS */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003970
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003971#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003972 if (ompt_enabled.ompt_callback_thread_end) {
3973 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3974 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
Jonathan Peyton30419822017-05-12 18:01:32 +00003975 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003976#endif
3977
Jonathan Peyton30419822017-05-12 18:01:32 +00003978 TCW_4(__kmp_nth,
3979 __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
Jonathan Peytonf4392462017-07-27 20:58:41 +00003980 root->r.r_cg_nthreads--;
3981
Jonathan Peyton30419822017-05-12 18:01:32 +00003982 __kmp_reap_thread(root->r.r_uber_thread, 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003983
Jonathan Peyton30419822017-05-12 18:01:32 +00003984 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead
3985 // of freeing.
3986 root->r.r_uber_thread = NULL;
3987 /* mark root as no longer in use */
3988 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003989
Jonathan Peyton30419822017-05-12 18:01:32 +00003990 return n;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003991}
3992
Jonathan Peyton30419822017-05-12 18:01:32 +00003993void __kmp_unregister_root_current_thread(int gtid) {
3994 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3995 /* this lock should be ok, since unregister_root_current_thread is never
3996 called during an abort, only during a normal close. furthermore, if you
3997 have the forkjoin lock, you should never try to get the initz lock */
3998 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3999 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4000 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "
4001 "exiting T#%d\n",
4002 gtid));
4003 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4004 return;
4005 }
4006 kmp_root_t *root = __kmp_root[gtid];
Jim Cownie77c2a632014-09-03 11:34:33 +00004007
Jonathan Peyton30419822017-05-12 18:01:32 +00004008 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4009 KMP_ASSERT(KMP_UBER_GTID(gtid));
4010 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4011 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004012
Jonathan Peyton30419822017-05-12 18:01:32 +00004013 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004014
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004015#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004016 kmp_info_t *thread = __kmp_threads[gtid];
4017 kmp_team_t *team = thread->th.th_team;
4018 kmp_task_team_t *task_team = thread->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00004019
Jonathan Peyton30419822017-05-12 18:01:32 +00004020 // we need to wait for the proxy tasks before finishing the thread
4021 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
Jonathan Peyton6d247f72015-09-10 21:33:50 +00004022#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00004023 // the runtime is shutting down so we won't report any events
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00004024 thread->th.ompt_thread_info.state = ompt_state_undefined;
Jonathan Peyton6d247f72015-09-10 21:33:50 +00004025#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004026 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4027 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00004028#endif
4029
Jonathan Peyton30419822017-05-12 18:01:32 +00004030 __kmp_reset_root(gtid, root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004031
Jonathan Peyton30419822017-05-12 18:01:32 +00004032 /* free up this thread slot */
4033 __kmp_gtid_set_specific(KMP_GTID_DNE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004034#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +00004035 __kmp_gtid = KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004036#endif
4037
Jonathan Peyton30419822017-05-12 18:01:32 +00004038 KMP_MB();
4039 KC_TRACE(10,
4040 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004041
Jonathan Peyton30419822017-05-12 18:01:32 +00004042 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004043}
4044
Jonathan Peyton2321d572015-06-08 19:25:25 +00004045#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004046/* __kmp_forkjoin_lock must be already held
Jonathan Peyton30419822017-05-12 18:01:32 +00004047 Unregisters a root thread that is not the current thread. Returns the number
4048 of __kmp_threads entries freed as a result. */
4049static int __kmp_unregister_root_other_thread(int gtid) {
4050 kmp_root_t *root = __kmp_root[gtid];
4051 int r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004052
Jonathan Peyton30419822017-05-12 18:01:32 +00004053 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4054 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4055 KMP_ASSERT(KMP_UBER_GTID(gtid));
4056 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4057 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004058
Jonathan Peyton30419822017-05-12 18:01:32 +00004059 r = __kmp_reset_root(gtid, root);
4060 KC_TRACE(10,
4061 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4062 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004063}
Jonathan Peyton2321d572015-06-08 19:25:25 +00004064#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004065
Jim Cownie5e8470a2013-09-27 10:38:44 +00004066#if KMP_DEBUG
4067void __kmp_task_info() {
4068
Jonathan Peyton30419822017-05-12 18:01:32 +00004069 kmp_int32 gtid = __kmp_entry_gtid();
4070 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4071 kmp_info_t *this_thr = __kmp_threads[gtid];
4072 kmp_team_t *steam = this_thr->th.th_serial_team;
4073 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004074
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00004075 __kmp_printf(
4076 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4077 "ptask=%p\n",
4078 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4079 team->t.t_implicit_task_taskdata[tid].td_parent);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004080}
4081#endif // KMP_DEBUG
4082
Jonathan Peyton30419822017-05-12 18:01:32 +00004083/* TODO optimize with one big memclr, take out what isn't needed, split
4084 responsibility to workers as much as possible, and delay initialization of
4085 features as much as possible */
4086static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4087 int tid, int gtid) {
4088 /* this_thr->th.th_info.ds.ds_gtid is setup in
4089 kmp_allocate_thread/create_worker.
4090 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4091 kmp_info_t *master = team->t.t_threads[0];
4092 KMP_DEBUG_ASSERT(this_thr != NULL);
4093 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4094 KMP_DEBUG_ASSERT(team);
4095 KMP_DEBUG_ASSERT(team->t.t_threads);
4096 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4097 KMP_DEBUG_ASSERT(master);
4098 KMP_DEBUG_ASSERT(master->th.th_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004099
Jonathan Peyton30419822017-05-12 18:01:32 +00004100 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004101
Jonathan Peyton30419822017-05-12 18:01:32 +00004102 TCW_SYNC_PTR(this_thr->th.th_team, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004103
Jonathan Peyton30419822017-05-12 18:01:32 +00004104 this_thr->th.th_info.ds.ds_tid = tid;
4105 this_thr->th.th_set_nproc = 0;
4106 if (__kmp_tasking_mode != tskm_immediate_exec)
4107 // When tasking is possible, threads are not safe to reap until they are
4108 // done tasking; this will be set when tasking code is exited in wait
4109 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4110 else // no tasking --> always safe to reap
4111 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004112#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004113 this_thr->th.th_set_proc_bind = proc_bind_default;
4114#if KMP_AFFINITY_SUPPORTED
4115 this_thr->th.th_new_place = this_thr->th.th_current_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004116#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004117#endif
4118 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004119
Jonathan Peyton30419822017-05-12 18:01:32 +00004120 /* setup the thread's cache of the team structure */
4121 this_thr->th.th_team_nproc = team->t.t_nproc;
4122 this_thr->th.th_team_master = master;
4123 this_thr->th.th_team_serialized = team->t.t_serialized;
4124 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004125
Jonathan Peyton30419822017-05-12 18:01:32 +00004126 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004127
Jonathan Peyton30419822017-05-12 18:01:32 +00004128 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4129 tid, gtid, this_thr, this_thr->th.th_current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004130
Jonathan Peyton30419822017-05-12 18:01:32 +00004131 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4132 team, tid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004133
Jonathan Peyton30419822017-05-12 18:01:32 +00004134 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4135 tid, gtid, this_thr, this_thr->th.th_current_task));
4136 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4137 // __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004138
Jonathan Peyton30419822017-05-12 18:01:32 +00004139 /* TODO no worksharing in speculative threads */
4140 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004141
Jonathan Peyton30419822017-05-12 18:01:32 +00004142 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004143
Jonathan Peyton30419822017-05-12 18:01:32 +00004144 if (!this_thr->th.th_pri_common) {
4145 this_thr->th.th_pri_common =
4146 (struct common_table *)__kmp_allocate(sizeof(struct common_table));
4147 if (__kmp_storage_map) {
4148 __kmp_print_storage_map_gtid(
4149 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4150 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00004151 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004152 this_thr->th.th_pri_head = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00004153 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004154
Jonathan Peyton30419822017-05-12 18:01:32 +00004155 /* Initialize dynamic dispatch */
4156 {
4157 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4158 // Use team max_nproc since this will never change for the team.
4159 size_t disp_size =
4160 sizeof(dispatch_private_info_t) *
4161 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4162 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4163 team->t.t_max_nproc));
4164 KMP_ASSERT(dispatch);
4165 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4166 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004167
Jonathan Peyton30419822017-05-12 18:01:32 +00004168 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004169#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004170 dispatch->th_doacross_buf_idx = 0;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004171#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004172 if (!dispatch->th_disp_buffer) {
4173 dispatch->th_disp_buffer =
4174 (dispatch_private_info_t *)__kmp_allocate(disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004175
Jonathan Peyton30419822017-05-12 18:01:32 +00004176 if (__kmp_storage_map) {
4177 __kmp_print_storage_map_gtid(
4178 gtid, &dispatch->th_disp_buffer[0],
4179 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4180 ? 1
4181 : __kmp_dispatch_num_buffers],
4182 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4183 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4184 gtid, team->t.t_id, gtid);
4185 }
4186 } else {
4187 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004188 }
4189
Jonathan Peyton30419822017-05-12 18:01:32 +00004190 dispatch->th_dispatch_pr_current = 0;
4191 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004192
Jonathan Peyton30419822017-05-12 18:01:32 +00004193 dispatch->th_deo_fcn = 0; /* ORDERED */
4194 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4195 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004196
Jonathan Peyton30419822017-05-12 18:01:32 +00004197 this_thr->th.th_next_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004198
Jonathan Peyton30419822017-05-12 18:01:32 +00004199 if (!this_thr->th.th_task_state_memo_stack) {
4200 size_t i;
4201 this_thr->th.th_task_state_memo_stack =
4202 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8));
4203 this_thr->th.th_task_state_top = 0;
4204 this_thr->th.th_task_state_stack_sz = 4;
4205 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4206 ++i) // zero init the stack
4207 this_thr->th.th_task_state_memo_stack[i] = 0;
4208 }
4209
4210 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4211 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4212
4213 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004214}
4215
Jonathan Peyton30419822017-05-12 18:01:32 +00004216/* allocate a new thread for the requesting team. this is only called from
4217 within a forkjoin critical section. we will first try to get an available
4218 thread from the thread pool. if none is available, we will fork a new one
4219 assuming we are able to create a new one. this should be assured, as the
4220 caller should check on this first. */
4221kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4222 int new_tid) {
4223 kmp_team_t *serial_team;
4224 kmp_info_t *new_thr;
4225 int new_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004226
Jonathan Peyton30419822017-05-12 18:01:32 +00004227 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4228 KMP_DEBUG_ASSERT(root && team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004229#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004230 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004231#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004232 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004233
Jonathan Peyton30419822017-05-12 18:01:32 +00004234 /* first, try to get one from the thread pool */
4235 if (__kmp_thread_pool) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004236
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004237 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00004238 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4239 if (new_thr == __kmp_thread_pool_insert_pt) {
4240 __kmp_thread_pool_insert_pt = NULL;
4241 }
4242 TCW_4(new_thr->th.th_in_pool, FALSE);
4243 // Don't touch th_active_in_pool or th_active.
4244 // The worker thread adjusts those flags as it sleeps/awakens.
4245 __kmp_thread_pool_nth--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004246
Jonathan Peyton30419822017-05-12 18:01:32 +00004247 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4248 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4249 KMP_ASSERT(!new_thr->th.th_team);
4250 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4251 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004252
Jonathan Peyton30419822017-05-12 18:01:32 +00004253 /* setup the thread structure */
4254 __kmp_initialize_info(new_thr, team, new_tid,
4255 new_thr->th.th_info.ds.ds_gtid);
4256 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004257
Jonathan Peyton30419822017-05-12 18:01:32 +00004258 TCW_4(__kmp_nth, __kmp_nth + 1);
Jonathan Peytonf4392462017-07-27 20:58:41 +00004259 root->r.r_cg_nthreads++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004260
Jonathan Peyton30419822017-05-12 18:01:32 +00004261 new_thr->th.th_task_state = 0;
4262 new_thr->th.th_task_state_top = 0;
4263 new_thr->th.th_task_state_stack_sz = 4;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004264
Jim Cownie5e8470a2013-09-27 10:38:44 +00004265#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00004266 /* Adjust blocktime back to zero if necessary */
4267 /* Middle initialization might not have occurred yet */
4268 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4269 if (__kmp_nth > __kmp_avail_proc) {
4270 __kmp_zero_bt = TRUE;
4271 }
4272 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004273#endif /* KMP_ADJUST_BLOCKTIME */
4274
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004275#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004276 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4277 // KMP_BARRIER_PARENT_FLAG.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004278 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00004279 kmp_balign_t *balign = new_thr->th.th_bar;
4280 for (b = 0; b < bs_last_barrier; ++b)
4281 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004282#endif
4283
Jonathan Peyton30419822017-05-12 18:01:32 +00004284 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4285 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004286
Jim Cownie5e8470a2013-09-27 10:38:44 +00004287 KMP_MB();
4288 return new_thr;
Jonathan Peyton30419822017-05-12 18:01:32 +00004289 }
4290
4291 /* no, well fork a new one */
4292 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4293 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4294
4295#if KMP_USE_MONITOR
4296 // If this is the first worker thread the RTL is creating, then also
4297 // launch the monitor thread. We try to do this as early as possible.
4298 if (!TCR_4(__kmp_init_monitor)) {
4299 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4300 if (!TCR_4(__kmp_init_monitor)) {
4301 KF_TRACE(10, ("before __kmp_create_monitor\n"));
4302 TCW_4(__kmp_init_monitor, 1);
4303 __kmp_create_monitor(&__kmp_monitor);
4304 KF_TRACE(10, ("after __kmp_create_monitor\n"));
4305#if KMP_OS_WINDOWS
4306 // AC: wait until monitor has started. This is a fix for CQ232808.
4307 // The reason is that if the library is loaded/unloaded in a loop with
4308 // small (parallel) work in between, then there is high probability that
4309 // monitor thread started after the library shutdown. At shutdown it is
4310 // too late to cope with the problem, because when the master is in
4311 // DllMain (process detach) the monitor has no chances to start (it is
4312 // blocked), and master has no means to inform the monitor that the
4313 // library has gone, because all the memory which the monitor can access
4314 // is going to be released/reset.
4315 while (TCR_4(__kmp_init_monitor) < 2) {
4316 KMP_YIELD(TRUE);
4317 }
4318 KF_TRACE(10, ("after monitor thread has started\n"));
4319#endif
4320 }
4321 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4322 }
4323#endif
4324
4325 KMP_MB();
4326 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4327 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4328 }
4329
4330 /* allocate space for it. */
4331 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
4332
4333 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4334
4335 if (__kmp_storage_map) {
4336 __kmp_print_thread_storage_map(new_thr, new_gtid);
4337 }
4338
4339 // add the reserve serialized team, initialized from the team's master thread
4340 {
4341 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4342 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"));
4343 new_thr->th.th_serial_team = serial_team =
4344 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4345#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00004346 ompt_data_none, // root parallel id
Jonathan Peyton30419822017-05-12 18:01:32 +00004347#endif
4348#if OMP_40_ENABLED
4349 proc_bind_default,
4350#endif
4351 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4352 }
4353 KMP_ASSERT(serial_team);
4354 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4355 // execution (it is unused for now).
4356 serial_team->t.t_threads[0] = new_thr;
4357 KF_TRACE(10,
4358 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4359 new_thr));
4360
4361 /* setup the thread structures */
4362 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4363
4364#if USE_FAST_MEMORY
4365 __kmp_initialize_fast_memory(new_thr);
4366#endif /* USE_FAST_MEMORY */
4367
4368#if KMP_USE_BGET
4369 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4370 __kmp_initialize_bget(new_thr);
4371#endif
4372
4373 __kmp_init_random(new_thr); // Initialize random number generator
4374
4375 /* Initialize these only once when thread is grabbed for a team allocation */
4376 KA_TRACE(20,
4377 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4378 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4379
4380 int b;
4381 kmp_balign_t *balign = new_thr->th.th_bar;
4382 for (b = 0; b < bs_last_barrier; ++b) {
4383 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4384 balign[b].bb.team = NULL;
4385 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4386 balign[b].bb.use_oncore_barrier = 0;
4387 }
4388
4389 new_thr->th.th_spin_here = FALSE;
4390 new_thr->th.th_next_waiting = 0;
Jonathan Peytona764af62018-07-19 19:17:00 +00004391#if KMP_OS_UNIX
4392 new_thr->th.th_blocking = false;
4393#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004394
4395#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4396 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4397 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4398 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4399 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4400#endif
Jonathan Peyton92ca6182018-09-07 18:25:49 +00004401#if OMP_50_ENABLED
4402 new_thr->th.th_def_allocator = __kmp_def_allocator;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004403 new_thr->th.th_prev_level = 0;
4404 new_thr->th.th_prev_num_threads = 1;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00004405#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004406
4407 TCW_4(new_thr->th.th_in_pool, FALSE);
4408 new_thr->th.th_active_in_pool = FALSE;
4409 TCW_4(new_thr->th.th_active, TRUE);
4410
4411 /* adjust the global counters */
4412 __kmp_all_nth++;
4413 __kmp_nth++;
4414
Jonathan Peytonf4392462017-07-27 20:58:41 +00004415 root->r.r_cg_nthreads++;
4416
Jonathan Peyton30419822017-05-12 18:01:32 +00004417 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4418 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4419 if (__kmp_adjust_gtid_mode) {
4420 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4421 if (TCR_4(__kmp_gtid_mode) != 2) {
4422 TCW_4(__kmp_gtid_mode, 2);
4423 }
4424 } else {
4425 if (TCR_4(__kmp_gtid_mode) != 1) {
4426 TCW_4(__kmp_gtid_mode, 1);
4427 }
4428 }
4429 }
4430
4431#ifdef KMP_ADJUST_BLOCKTIME
4432 /* Adjust blocktime back to zero if necessary */
4433 /* Middle initialization might not have occurred yet */
4434 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4435 if (__kmp_nth > __kmp_avail_proc) {
4436 __kmp_zero_bt = TRUE;
4437 }
4438 }
4439#endif /* KMP_ADJUST_BLOCKTIME */
4440
4441 /* actually fork it and create the new worker thread */
4442 KF_TRACE(
4443 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4444 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4445 KF_TRACE(10,
4446 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4447
4448 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4449 new_gtid));
4450 KMP_MB();
4451 return new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004452}
4453
Jonathan Peyton30419822017-05-12 18:01:32 +00004454/* Reinitialize team for reuse.
4455 The hot team code calls this case at every fork barrier, so EPCC barrier
4456 test are extremely sensitive to changes in it, esp. writes to the team
4457 struct, which cause a cache invalidation in all threads.
4458 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4459static void __kmp_reinitialize_team(kmp_team_t *team,
4460 kmp_internal_control_t *new_icvs,
4461 ident_t *loc) {
4462 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4463 team->t.t_threads[0], team));
4464 KMP_DEBUG_ASSERT(team && new_icvs);
4465 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4466 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004467
Jonathan Peyton30419822017-05-12 18:01:32 +00004468 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jonathan Peyton30419822017-05-12 18:01:32 +00004469 // Copy ICVs to the master thread's implicit taskdata
4470 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4471 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004472
Jonathan Peyton30419822017-05-12 18:01:32 +00004473 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4474 team->t.t_threads[0], team));
Jim Cownie181b4bb2013-12-23 17:28:57 +00004475}
4476
Jonathan Peyton30419822017-05-12 18:01:32 +00004477/* Initialize the team data structure.
4478 This assumes the t_threads and t_max_nproc are already set.
4479 Also, we don't touch the arguments */
4480static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4481 kmp_internal_control_t *new_icvs,
4482 ident_t *loc) {
4483 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004484
Jonathan Peyton30419822017-05-12 18:01:32 +00004485 /* verify */
4486 KMP_DEBUG_ASSERT(team);
4487 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4488 KMP_DEBUG_ASSERT(team->t.t_threads);
4489 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004490
Jonathan Peyton30419822017-05-12 18:01:32 +00004491 team->t.t_master_tid = 0; /* not needed */
4492 /* team->t.t_master_bar; not needed */
4493 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4494 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004495
Jonathan Peyton30419822017-05-12 18:01:32 +00004496 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4497 team->t.t_next_pool = NULL;
4498 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4499 * up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004500
Jonathan Peyton30419822017-05-12 18:01:32 +00004501 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
4502 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004503
Jonathan Peyton30419822017-05-12 18:01:32 +00004504 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00004505 team->t.t_sched.sched = new_icvs->sched.sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004506
4507#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00004508 team->t.t_fp_control_saved = FALSE; /* not needed */
4509 team->t.t_x87_fpu_control_word = 0; /* not needed */
4510 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004511#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4512
Jonathan Peyton30419822017-05-12 18:01:32 +00004513 team->t.t_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004514
Jonathan Peyton30419822017-05-12 18:01:32 +00004515 team->t.t_ordered.dt.t_value = 0;
4516 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004517
Jonathan Peyton30419822017-05-12 18:01:32 +00004518 memset(&team->t.t_taskq, '\0', sizeof(kmp_taskq_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004519
4520#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004521 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004522#endif
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00004523#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00004524 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00004525#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004526
Jonathan Peyton30419822017-05-12 18:01:32 +00004527 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004528
Jonathan Peyton30419822017-05-12 18:01:32 +00004529 __kmp_reinitialize_team(team, new_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004530
Jonathan Peyton30419822017-05-12 18:01:32 +00004531 KMP_MB();
4532 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004533}
4534
Alp Toker98758b02014-03-02 04:12:06 +00004535#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004536/* Sets full mask for thread and returns old mask, no changes to structures. */
4537static void
Jonathan Peyton30419822017-05-12 18:01:32 +00004538__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4539 if (KMP_AFFINITY_CAPABLE()) {
4540 int status;
4541 if (old_mask != NULL) {
4542 status = __kmp_get_system_affinity(old_mask, TRUE);
4543 int error = errno;
4544 if (status != 0) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00004545 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4546 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00004547 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004548 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004549 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4550 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004551}
4552#endif
4553
Alp Toker98758b02014-03-02 04:12:06 +00004554#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004555
Jim Cownie5e8470a2013-09-27 10:38:44 +00004556// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4557// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004558// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004559// The master thread's partition should already include its current binding.
Jonathan Peyton30419822017-05-12 18:01:32 +00004560static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4561 // Copy the master thread's place partion to the team struct
4562 kmp_info_t *master_th = team->t.t_threads[0];
4563 KMP_DEBUG_ASSERT(master_th != NULL);
4564 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4565 int first_place = master_th->th.th_first_place;
4566 int last_place = master_th->th.th_last_place;
4567 int masters_place = master_th->th.th_current_place;
4568 team->t.t_first_place = first_place;
4569 team->t.t_last_place = last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004570
Jonathan Peyton30419822017-05-12 18:01:32 +00004571 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4572 "bound to place %d partition = [%d,%d]\n",
4573 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4574 team->t.t_id, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004575
Jonathan Peyton30419822017-05-12 18:01:32 +00004576 switch (proc_bind) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004577
Jonathan Peyton30419822017-05-12 18:01:32 +00004578 case proc_bind_default:
4579 // serial teams might have the proc_bind policy set to proc_bind_default. It
4580 // doesn't matter, as we don't rebind master thread for any proc_bind policy
4581 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4582 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004583
Jonathan Peyton30419822017-05-12 18:01:32 +00004584 case proc_bind_master: {
4585 int f;
4586 int n_th = team->t.t_nproc;
4587 for (f = 1; f < n_th; f++) {
4588 kmp_info_t *th = team->t.t_threads[f];
4589 KMP_DEBUG_ASSERT(th != NULL);
4590 th->th.th_first_place = first_place;
4591 th->th.th_last_place = last_place;
4592 th->th.th_new_place = masters_place;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004593#if OMP_50_ENABLED
4594 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4595 team->t.t_display_affinity != 1) {
4596 team->t.t_display_affinity = 1;
4597 }
4598#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004599
Jonathan Peyton30419822017-05-12 18:01:32 +00004600 KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d "
4601 "partition = [%d,%d]\n",
4602 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4603 f, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004604 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004605 } break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004606
Jonathan Peyton30419822017-05-12 18:01:32 +00004607 case proc_bind_close: {
4608 int f;
4609 int n_th = team->t.t_nproc;
4610 int n_places;
4611 if (first_place <= last_place) {
4612 n_places = last_place - first_place + 1;
4613 } else {
4614 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4615 }
4616 if (n_th <= n_places) {
4617 int place = masters_place;
4618 for (f = 1; f < n_th; f++) {
4619 kmp_info_t *th = team->t.t_threads[f];
4620 KMP_DEBUG_ASSERT(th != NULL);
4621
4622 if (place == last_place) {
4623 place = first_place;
4624 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4625 place = 0;
4626 } else {
4627 place++;
4628 }
4629 th->th.th_first_place = first_place;
4630 th->th.th_last_place = last_place;
4631 th->th.th_new_place = place;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004632#if OMP_50_ENABLED
4633 if (__kmp_display_affinity && place != th->th.th_current_place &&
4634 team->t.t_display_affinity != 1) {
4635 team->t.t_display_affinity = 1;
4636 }
4637#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004638
4639 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4640 "partition = [%d,%d]\n",
4641 __kmp_gtid_from_thread(team->t.t_threads[f]),
4642 team->t.t_id, f, place, first_place, last_place));
4643 }
4644 } else {
4645 int S, rem, gap, s_count;
4646 S = n_th / n_places;
4647 s_count = 0;
4648 rem = n_th - (S * n_places);
4649 gap = rem > 0 ? n_places / rem : n_places;
4650 int place = masters_place;
4651 int gap_ct = gap;
4652 for (f = 0; f < n_th; f++) {
4653 kmp_info_t *th = team->t.t_threads[f];
4654 KMP_DEBUG_ASSERT(th != NULL);
4655
4656 th->th.th_first_place = first_place;
4657 th->th.th_last_place = last_place;
4658 th->th.th_new_place = place;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004659#if OMP_50_ENABLED
4660 if (__kmp_display_affinity && place != th->th.th_current_place &&
4661 team->t.t_display_affinity != 1) {
4662 team->t.t_display_affinity = 1;
4663 }
4664#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004665 s_count++;
4666
4667 if ((s_count == S) && rem && (gap_ct == gap)) {
4668 // do nothing, add an extra thread to place on next iteration
4669 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4670 // we added an extra thread to this place; move to next place
4671 if (place == last_place) {
4672 place = first_place;
4673 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4674 place = 0;
4675 } else {
4676 place++;
4677 }
4678 s_count = 0;
4679 gap_ct = 1;
4680 rem--;
4681 } else if (s_count == S) { // place full; don't add extra
4682 if (place == last_place) {
4683 place = first_place;
4684 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4685 place = 0;
4686 } else {
4687 place++;
4688 }
4689 gap_ct++;
4690 s_count = 0;
4691 }
4692
4693 KA_TRACE(100,
4694 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4695 "partition = [%d,%d]\n",
4696 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4697 th->th.th_new_place, first_place, last_place));
4698 }
4699 KMP_DEBUG_ASSERT(place == masters_place);
4700 }
4701 } break;
4702
4703 case proc_bind_spread: {
4704 int f;
4705 int n_th = team->t.t_nproc;
4706 int n_places;
4707 int thidx;
4708 if (first_place <= last_place) {
4709 n_places = last_place - first_place + 1;
4710 } else {
4711 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4712 }
4713 if (n_th <= n_places) {
Paul Osmialowskia0162792017-08-10 23:04:11 +00004714 int place = -1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004715
Paul Osmialowskia0162792017-08-10 23:04:11 +00004716 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4717 int S = n_places / n_th;
4718 int s_count, rem, gap, gap_ct;
4719
4720 place = masters_place;
4721 rem = n_places - n_th * S;
4722 gap = rem ? n_th / rem : 1;
4723 gap_ct = gap;
4724 thidx = n_th;
4725 if (update_master_only == 1)
4726 thidx = 1;
4727 for (f = 0; f < thidx; f++) {
4728 kmp_info_t *th = team->t.t_threads[f];
4729 KMP_DEBUG_ASSERT(th != NULL);
4730
4731 th->th.th_first_place = place;
4732 th->th.th_new_place = place;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004733#if OMP_50_ENABLED
4734 if (__kmp_display_affinity && place != th->th.th_current_place &&
4735 team->t.t_display_affinity != 1) {
4736 team->t.t_display_affinity = 1;
4737 }
4738#endif
Paul Osmialowskia0162792017-08-10 23:04:11 +00004739 s_count = 1;
4740 while (s_count < S) {
4741 if (place == last_place) {
4742 place = first_place;
4743 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4744 place = 0;
4745 } else {
4746 place++;
4747 }
4748 s_count++;
4749 }
4750 if (rem && (gap_ct == gap)) {
4751 if (place == last_place) {
4752 place = first_place;
4753 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4754 place = 0;
4755 } else {
4756 place++;
4757 }
4758 rem--;
4759 gap_ct = 0;
4760 }
4761 th->th.th_last_place = place;
4762 gap_ct++;
4763
Jonathan Peyton30419822017-05-12 18:01:32 +00004764 if (place == last_place) {
4765 place = first_place;
4766 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4767 place = 0;
4768 } else {
4769 place++;
4770 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004771
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004772 KA_TRACE(100,
4773 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4774 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4775 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4776 f, th->th.th_new_place, th->th.th_first_place,
4777 th->th.th_last_place, __kmp_affinity_num_masks));
Jonathan Peyton30419822017-05-12 18:01:32 +00004778 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004779 } else {
4780 /* Having uniform space of available computation places I can create
4781 T partitions of round(P/T) size and put threads into the first
4782 place of each partition. */
4783 double current = static_cast<double>(masters_place);
4784 double spacing =
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004785 (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
Paul Osmialowskia0162792017-08-10 23:04:11 +00004786 int first, last;
4787 kmp_info_t *th;
4788
4789 thidx = n_th + 1;
4790 if (update_master_only == 1)
4791 thidx = 1;
4792 for (f = 0; f < thidx; f++) {
4793 first = static_cast<int>(current);
4794 last = static_cast<int>(current + spacing) - 1;
4795 KMP_DEBUG_ASSERT(last >= first);
4796 if (first >= n_places) {
4797 if (masters_place) {
4798 first -= n_places;
4799 last -= n_places;
4800 if (first == (masters_place + 1)) {
4801 KMP_DEBUG_ASSERT(f == n_th);
4802 first--;
4803 }
4804 if (last == masters_place) {
4805 KMP_DEBUG_ASSERT(f == (n_th - 1));
4806 last--;
4807 }
4808 } else {
4809 KMP_DEBUG_ASSERT(f == n_th);
4810 first = 0;
4811 last = 0;
4812 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004813 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004814 if (last >= n_places) {
4815 last = (n_places - 1);
4816 }
4817 place = first;
4818 current += spacing;
4819 if (f < n_th) {
4820 KMP_DEBUG_ASSERT(0 <= first);
4821 KMP_DEBUG_ASSERT(n_places > first);
4822 KMP_DEBUG_ASSERT(0 <= last);
4823 KMP_DEBUG_ASSERT(n_places > last);
4824 KMP_DEBUG_ASSERT(last_place >= first_place);
4825 th = team->t.t_threads[f];
4826 KMP_DEBUG_ASSERT(th);
4827 th->th.th_first_place = first;
4828 th->th.th_new_place = place;
4829 th->th.th_last_place = last;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004830#if OMP_50_ENABLED
4831 if (__kmp_display_affinity && place != th->th.th_current_place &&
4832 team->t.t_display_affinity != 1) {
4833 team->t.t_display_affinity = 1;
4834 }
4835#endif
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004836 KA_TRACE(100,
4837 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4838 "partition = [%d,%d], spacing = %.4f\n",
4839 __kmp_gtid_from_thread(team->t.t_threads[f]),
4840 team->t.t_id, f, th->th.th_new_place,
4841 th->th.th_first_place, th->th.th_last_place, spacing));
Paul Osmialowskia0162792017-08-10 23:04:11 +00004842 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004843 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004844 }
4845 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4846 } else {
4847 int S, rem, gap, s_count;
4848 S = n_th / n_places;
4849 s_count = 0;
4850 rem = n_th - (S * n_places);
4851 gap = rem > 0 ? n_places / rem : n_places;
4852 int place = masters_place;
4853 int gap_ct = gap;
4854 thidx = n_th;
4855 if (update_master_only == 1)
4856 thidx = 1;
4857 for (f = 0; f < thidx; f++) {
4858 kmp_info_t *th = team->t.t_threads[f];
4859 KMP_DEBUG_ASSERT(th != NULL);
4860
4861 th->th.th_first_place = place;
4862 th->th.th_last_place = place;
4863 th->th.th_new_place = place;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004864#if OMP_50_ENABLED
4865 if (__kmp_display_affinity && place != th->th.th_current_place &&
4866 team->t.t_display_affinity != 1) {
4867 team->t.t_display_affinity = 1;
4868 }
4869#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004870 s_count++;
4871
4872 if ((s_count == S) && rem && (gap_ct == gap)) {
4873 // do nothing, add an extra thread to place on next iteration
4874 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4875 // we added an extra thread to this place; move on to next place
4876 if (place == last_place) {
4877 place = first_place;
4878 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4879 place = 0;
4880 } else {
4881 place++;
4882 }
4883 s_count = 0;
4884 gap_ct = 1;
4885 rem--;
4886 } else if (s_count == S) { // place is full; don't add extra thread
4887 if (place == last_place) {
4888 place = first_place;
4889 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4890 place = 0;
4891 } else {
4892 place++;
4893 }
4894 gap_ct++;
4895 s_count = 0;
4896 }
4897
4898 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4899 "partition = [%d,%d]\n",
4900 __kmp_gtid_from_thread(team->t.t_threads[f]),
4901 team->t.t_id, f, th->th.th_new_place,
4902 th->th.th_first_place, th->th.th_last_place));
4903 }
4904 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4905 }
4906 } break;
4907
4908 default:
4909 break;
4910 }
4911
4912 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004913}
4914
Alp Toker98758b02014-03-02 04:12:06 +00004915#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004916
Jonathan Peyton30419822017-05-12 18:01:32 +00004917/* allocate a new team data structure to use. take one off of the free pool if
4918 available */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004919kmp_team_t *
Jonathan Peyton30419822017-05-12 18:01:32 +00004920__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004921#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00004922 ompt_data_t ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004923#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004924#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004925 kmp_proc_bind_t new_proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00004926#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004927 kmp_internal_control_t *new_icvs,
4928 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4929 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4930 int f;
4931 kmp_team_t *team;
4932 int use_hot_team = !root->r.r_active;
4933 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004934
Jonathan Peyton30419822017-05-12 18:01:32 +00004935 KA_TRACE(20, ("__kmp_allocate_team: called\n"));
4936 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4937 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4938 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004939
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004940#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004941 kmp_hot_team_ptr_t *hot_teams;
4942 if (master) {
4943 team = master->th.th_team;
4944 level = team->t.t_active_level;
4945 if (master->th.th_teams_microtask) { // in teams construct?
4946 if (master->th.th_teams_size.nteams > 1 &&
4947 ( // #teams > 1
4948 team->t.t_pkfn ==
4949 (microtask_t)__kmp_teams_master || // inner fork of the teams
4950 master->th.th_teams_level <
4951 team->t.t_level)) { // or nested parallel inside the teams
4952 ++level; // not increment if #teams==1, or for outer fork of the teams;
4953 // increment otherwise
4954 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004955 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004956 hot_teams = master->th.th_hot_teams;
4957 if (level < __kmp_hot_teams_max_level && hot_teams &&
4958 hot_teams[level]
4959 .hot_team) { // hot team has already been allocated for given level
4960 use_hot_team = 1;
4961 } else {
4962 use_hot_team = 0;
4963 }
4964 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004965#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004966 // Optimization to use a "hot" team
4967 if (use_hot_team && new_nproc > 1) {
4968 KMP_DEBUG_ASSERT(new_nproc == max_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004969#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004970 team = hot_teams[level].hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004971#else
Jonathan Peyton30419822017-05-12 18:01:32 +00004972 team = root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004973#endif
4974#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004975 if (__kmp_tasking_mode != tskm_immediate_exec) {
4976 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
4977 "task_team[1] = %p before reinit\n",
4978 team->t.t_task_team[0], team->t.t_task_team[1]));
4979 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004980#endif
4981
Jonathan Peyton30419822017-05-12 18:01:32 +00004982 // Has the number of threads changed?
4983 /* Let's assume the most common case is that the number of threads is
4984 unchanged, and put that case first. */
4985 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4986 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"));
4987 // This case can mean that omp_set_num_threads() was called and the hot
Jonathan Peyton642688b2017-06-01 16:46:36 +00004988 // team size was already reduced, so we check the special flag
Jonathan Peyton30419822017-05-12 18:01:32 +00004989 if (team->t.t_size_changed == -1) {
4990 team->t.t_size_changed = 1;
4991 } else {
4992 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4993 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004994
Jonathan Peyton30419822017-05-12 18:01:32 +00004995 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4996 kmp_r_sched_t new_sched = new_icvs->sched;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00004997 // set master's schedule as new run-time schedule
4998 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004999
Jonathan Peyton30419822017-05-12 18:01:32 +00005000 __kmp_reinitialize_team(team, new_icvs,
5001 root->r.r_uber_thread->th.th_ident);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005002
Jonathan Peyton30419822017-05-12 18:01:32 +00005003 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5004 team->t.t_threads[0], team));
5005 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005006
5007#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00005008#if KMP_AFFINITY_SUPPORTED
5009 if ((team->t.t_size_changed == 0) &&
5010 (team->t.t_proc_bind == new_proc_bind)) {
5011 if (new_proc_bind == proc_bind_spread) {
5012 __kmp_partition_places(
5013 team, 1); // add flag to update only master for spread
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005014 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005015 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "
5016 "proc_bind = %d, partition = [%d,%d]\n",
5017 team->t.t_id, new_proc_bind, team->t.t_first_place,
5018 team->t.t_last_place));
5019 } else {
5020 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5021 __kmp_partition_places(team);
5022 }
5023#else
5024 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5025#endif /* KMP_AFFINITY_SUPPORTED */
5026#endif /* OMP_40_ENABLED */
5027 } else if (team->t.t_nproc > new_nproc) {
5028 KA_TRACE(20,
5029 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",
5030 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005031
Jonathan Peyton30419822017-05-12 18:01:32 +00005032 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005033#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005034 if (__kmp_hot_teams_mode == 0) {
5035 // AC: saved number of threads should correspond to team's value in this
5036 // mode, can be bigger in mode 1, when hot team has threads in reserve
5037 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5038 hot_teams[level].hot_team_nth = new_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005039#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005040 /* release the extra threads we don't need any more */
5041 for (f = new_nproc; f < team->t.t_nproc; f++) {
5042 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5043 if (__kmp_tasking_mode != tskm_immediate_exec) {
5044 // When decreasing team size, threads no longer in the team should
5045 // unref task team.
5046 team->t.t_threads[f]->th.th_task_team = NULL;
5047 }
5048 __kmp_free_thread(team->t.t_threads[f]);
5049 team->t.t_threads[f] = NULL;
5050 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005051#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005052 } // (__kmp_hot_teams_mode == 0)
5053 else {
5054 // When keeping extra threads in team, switch threads to wait on own
5055 // b_go flag
5056 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5057 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5058 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5059 for (int b = 0; b < bs_last_barrier; ++b) {
5060 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5061 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00005062 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005063 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5064 }
5065 }
5066 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005067#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005068 team->t.t_nproc = new_nproc;
5069 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00005070 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
Jonathan Peyton30419822017-05-12 18:01:32 +00005071 __kmp_reinitialize_team(team, new_icvs,
5072 root->r.r_uber_thread->th.th_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005073
Jonathan Peyton30419822017-05-12 18:01:32 +00005074 /* update the remaining threads */
5075 for (f = 0; f < new_nproc; ++f) {
5076 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5077 }
5078 // restore the current task state of the master thread: should be the
5079 // implicit task
5080 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5081 team->t.t_threads[0], team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005082
Jonathan Peyton30419822017-05-12 18:01:32 +00005083 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005084
5085#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00005086 for (f = 0; f < team->t.t_nproc; f++) {
5087 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5088 team->t.t_threads[f]->th.th_team_nproc ==
5089 team->t.t_nproc);
5090 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005091#endif
5092
5093#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00005094 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5095#if KMP_AFFINITY_SUPPORTED
5096 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005097#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005098#endif
5099 } else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00005100#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00005101 kmp_affin_mask_t *old_mask;
5102 if (KMP_AFFINITY_CAPABLE()) {
5103 KMP_CPU_ALLOC(old_mask);
5104 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005105#endif
5106
Jonathan Peyton30419822017-05-12 18:01:32 +00005107 KA_TRACE(20,
5108 ("__kmp_allocate_team: increasing hot team thread count to %d\n",
5109 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005110
Jonathan Peyton30419822017-05-12 18:01:32 +00005111 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005112
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005113#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005114 int avail_threads = hot_teams[level].hot_team_nth;
5115 if (new_nproc < avail_threads)
5116 avail_threads = new_nproc;
5117 kmp_info_t **other_threads = team->t.t_threads;
5118 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5119 // Adjust barrier data of reserved threads (if any) of the team
5120 // Other data will be set in __kmp_initialize_info() below.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005121 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00005122 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5123 for (b = 0; b < bs_last_barrier; ++b) {
5124 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5125 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005126#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00005127 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005128#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005129 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005130 }
5131 if (hot_teams[level].hot_team_nth >= new_nproc) {
5132 // we have all needed threads in reserve, no need to allocate any
5133 // this only possible in mode 1, cannot have reserved threads in mode 0
5134 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5135 team->t.t_nproc = new_nproc; // just get reserved threads involved
5136 } else {
5137 // we may have some threads in reserve, but not enough
5138 team->t.t_nproc =
5139 hot_teams[level]
5140 .hot_team_nth; // get reserved threads involved if any
5141 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
5142#endif // KMP_NESTED_HOT_TEAMS
5143 if (team->t.t_max_nproc < new_nproc) {
5144 /* reallocate larger arrays */
5145 __kmp_reallocate_team_arrays(team, new_nproc);
5146 __kmp_reinitialize_team(team, new_icvs, NULL);
5147 }
5148
5149#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5150 /* Temporarily set full mask for master thread before creation of
5151 workers. The reason is that workers inherit the affinity from master,
5152 so if a lot of workers are created on the single core quickly, they
5153 don't get a chance to set their own affinity for a long time. */
5154 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5155#endif
5156
5157 /* allocate new threads for the hot team */
5158 for (f = team->t.t_nproc; f < new_nproc; f++) {
5159 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5160 KMP_DEBUG_ASSERT(new_worker);
5161 team->t.t_threads[f] = new_worker;
5162
5163 KA_TRACE(20,
5164 ("__kmp_allocate_team: team %d init T#%d arrived: "
5165 "join=%llu, plain=%llu\n",
5166 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5167 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5168 team->t.t_bar[bs_plain_barrier].b_arrived));
5169
5170 { // Initialize barrier data for new threads.
5171 int b;
5172 kmp_balign_t *balign = new_worker->th.th_bar;
5173 for (b = 0; b < bs_last_barrier; ++b) {
5174 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5175 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5176 KMP_BARRIER_PARENT_FLAG);
5177#if USE_DEBUGGER
5178 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5179#endif
5180 }
5181 }
5182 }
5183
5184#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5185 if (KMP_AFFINITY_CAPABLE()) {
5186 /* Restore initial master thread's affinity mask */
5187 __kmp_set_system_affinity(old_mask, TRUE);
5188 KMP_CPU_FREE(old_mask);
5189 }
5190#endif
5191#if KMP_NESTED_HOT_TEAMS
5192 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5193#endif // KMP_NESTED_HOT_TEAMS
5194 /* make sure everyone is syncronized */
5195 int old_nproc = team->t.t_nproc; // save old value and use to update only
5196 // new threads below
5197 __kmp_initialize_team(team, new_nproc, new_icvs,
5198 root->r.r_uber_thread->th.th_ident);
5199
5200 /* reinitialize the threads */
5201 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5202 for (f = 0; f < team->t.t_nproc; ++f)
5203 __kmp_initialize_info(team->t.t_threads[f], team, f,
5204 __kmp_gtid_from_tid(f, team));
5205 if (level) { // set th_task_state for new threads in nested hot team
5206 // __kmp_initialize_info() no longer zeroes th_task_state, so we should
5207 // only need to set the th_task_state for the new threads. th_task_state
5208 // for master thread will not be accurate until after this in
5209 // __kmp_fork_call(), so we look to the master's memo_stack to get the
5210 // correct value.
5211 for (f = old_nproc; f < team->t.t_nproc; ++f)
5212 team->t.t_threads[f]->th.th_task_state =
5213 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5214 } else { // set th_task_state for new threads in non-nested hot team
5215 int old_state =
5216 team->t.t_threads[0]->th.th_task_state; // copy master's state
5217 for (f = old_nproc; f < team->t.t_nproc; ++f)
5218 team->t.t_threads[f]->th.th_task_state = old_state;
5219 }
5220
5221#ifdef KMP_DEBUG
5222 for (f = 0; f < team->t.t_nproc; ++f) {
5223 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5224 team->t.t_threads[f]->th.th_team_nproc ==
5225 team->t.t_nproc);
5226 }
5227#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005228
5229#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00005230 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5231#if KMP_AFFINITY_SUPPORTED
5232 __kmp_partition_places(team);
5233#endif
5234#endif
5235 } // Check changes in number of threads
5236
5237#if OMP_40_ENABLED
5238 kmp_info_t *master = team->t.t_threads[0];
5239 if (master->th.th_teams_microtask) {
5240 for (f = 1; f < new_nproc; ++f) {
5241 // propagate teams construct specific info to workers
5242 kmp_info_t *thr = team->t.t_threads[f];
5243 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5244 thr->th.th_teams_level = master->th.th_teams_level;
5245 thr->th.th_teams_size = master->th.th_teams_size;
5246 }
5247 }
5248#endif /* OMP_40_ENABLED */
5249#if KMP_NESTED_HOT_TEAMS
5250 if (level) {
5251 // Sync barrier state for nested hot teams, not needed for outermost hot
5252 // team.
5253 for (f = 1; f < new_nproc; ++f) {
5254 kmp_info_t *thr = team->t.t_threads[f];
5255 int b;
5256 kmp_balign_t *balign = thr->th.th_bar;
5257 for (b = 0; b < bs_last_barrier; ++b) {
5258 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5259 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5260#if USE_DEBUGGER
5261 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5262#endif
5263 }
5264 }
5265 }
5266#endif // KMP_NESTED_HOT_TEAMS
5267
5268 /* reallocate space for arguments if necessary */
5269 __kmp_alloc_argv_entries(argc, team, TRUE);
5270 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5271 // The hot team re-uses the previous task team,
5272 // if untouched during the previous release->gather phase.
5273
5274 KF_TRACE(10, (" hot_team = %p\n", team));
5275
5276#if KMP_DEBUG
5277 if (__kmp_tasking_mode != tskm_immediate_exec) {
5278 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
5279 "task_team[1] = %p after reinit\n",
5280 team->t.t_task_team[0], team->t.t_task_team[1]));
5281 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005282#endif
5283
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005284#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005285 __ompt_team_assign_id(team, ompt_parallel_data);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005286#endif
5287
Jim Cownie5e8470a2013-09-27 10:38:44 +00005288 KMP_MB();
5289
Jim Cownie5e8470a2013-09-27 10:38:44 +00005290 return team;
Jonathan Peyton30419822017-05-12 18:01:32 +00005291 }
5292
5293 /* next, let's try to take one from the team pool */
5294 KMP_MB();
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005295 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005296 /* TODO: consider resizing undersized teams instead of reaping them, now
5297 that we have a resizing mechanism */
5298 if (team->t.t_max_nproc >= max_nproc) {
5299 /* take this team from the team pool */
5300 __kmp_team_pool = team->t.t_next_pool;
5301
5302 /* setup the team for fresh use */
5303 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5304
5305 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "
5306 "task_team[1] %p to NULL\n",
5307 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5308 team->t.t_task_team[0] = NULL;
5309 team->t.t_task_team[1] = NULL;
5310
5311 /* reallocate space for arguments if necessary */
5312 __kmp_alloc_argv_entries(argc, team, TRUE);
5313 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5314
5315 KA_TRACE(
5316 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5317 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5318 { // Initialize barrier data.
5319 int b;
5320 for (b = 0; b < bs_last_barrier; ++b) {
5321 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5322#if USE_DEBUGGER
5323 team->t.t_bar[b].b_master_arrived = 0;
5324 team->t.t_bar[b].b_team_arrived = 0;
5325#endif
5326 }
5327 }
5328
5329#if OMP_40_ENABLED
5330 team->t.t_proc_bind = new_proc_bind;
5331#endif
5332
5333 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",
5334 team->t.t_id));
5335
5336#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005337 __ompt_team_assign_id(team, ompt_parallel_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005338#endif
5339
5340 KMP_MB();
5341
5342 return team;
5343 }
5344
Jonathan Peyton94a114f2017-10-20 19:30:57 +00005345 /* reap team if it is too small, then loop back and check the next one */
5346 // not sure if this is wise, but, will be redone during the hot-teams
5347 // rewrite.
5348 /* TODO: Use technique to find the right size hot-team, don't reap them */
Jonathan Peyton30419822017-05-12 18:01:32 +00005349 team = __kmp_reap_team(team);
5350 __kmp_team_pool = team;
5351 }
5352
5353 /* nothing available in the pool, no matter, make a new team! */
5354 KMP_MB();
5355 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t));
5356
5357 /* and set it up */
5358 team->t.t_max_nproc = max_nproc;
5359 /* NOTE well, for some reason allocating one big buffer and dividing it up
5360 seems to really hurt performance a lot on the P4, so, let's not use this */
5361 __kmp_allocate_team_arrays(team, max_nproc);
5362
5363 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"));
5364 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5365
5366 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5367 "%p to NULL\n",
5368 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5369 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes
5370 // memory, no need to duplicate
5371 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes
5372 // memory, no need to duplicate
5373
5374 if (__kmp_storage_map) {
5375 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5376 }
5377
5378 /* allocate space for arguments */
5379 __kmp_alloc_argv_entries(argc, team, FALSE);
5380 team->t.t_argc = argc;
5381
5382 KA_TRACE(20,
5383 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5384 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5385 { // Initialize barrier data.
5386 int b;
5387 for (b = 0; b < bs_last_barrier; ++b) {
5388 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5389#if USE_DEBUGGER
5390 team->t.t_bar[b].b_master_arrived = 0;
5391 team->t.t_bar[b].b_team_arrived = 0;
5392#endif
5393 }
5394 }
5395
5396#if OMP_40_ENABLED
5397 team->t.t_proc_bind = new_proc_bind;
5398#endif
5399
5400#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005401 __ompt_team_assign_id(team, ompt_parallel_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005402 team->t.ompt_serialized_team_info = NULL;
5403#endif
5404
5405 KMP_MB();
5406
5407 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",
5408 team->t.t_id));
5409
5410 return team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005411}
5412
5413/* TODO implement hot-teams at all levels */
5414/* TODO implement lazy thread release on demand (disband request) */
5415
5416/* free the team. return it to the team pool. release all the threads
5417 * associated with it */
Jonathan Peyton30419822017-05-12 18:01:32 +00005418void __kmp_free_team(kmp_root_t *root,
5419 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5420 int f;
5421 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5422 team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005423
Jonathan Peyton30419822017-05-12 18:01:32 +00005424 /* verify state */
5425 KMP_DEBUG_ASSERT(root);
5426 KMP_DEBUG_ASSERT(team);
5427 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5428 KMP_DEBUG_ASSERT(team->t.t_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005429
Jonathan Peyton30419822017-05-12 18:01:32 +00005430 int use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005431#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005432 int level;
5433 kmp_hot_team_ptr_t *hot_teams;
5434 if (master) {
5435 level = team->t.t_active_level - 1;
5436 if (master->th.th_teams_microtask) { // in teams construct?
5437 if (master->th.th_teams_size.nteams > 1) {
5438 ++level; // level was not increased in teams construct for
5439 // team_of_masters
5440 }
5441 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5442 master->th.th_teams_level == team->t.t_level) {
5443 ++level; // level was not increased in teams construct for
5444 // team_of_workers before the parallel
5445 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005446 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005447 hot_teams = master->th.th_hot_teams;
5448 if (level < __kmp_hot_teams_max_level) {
5449 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5450 use_hot_team = 1;
5451 }
5452 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005453#endif // KMP_NESTED_HOT_TEAMS
5454
Jonathan Peyton30419822017-05-12 18:01:32 +00005455 /* team is done working */
5456 TCW_SYNC_PTR(team->t.t_pkfn,
5457 NULL); // Important for Debugging Support Library.
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00005458#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00005459 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00005460#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005461 // Do not reset pointer to parent team to NULL for hot teams.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005462
Jonathan Peyton30419822017-05-12 18:01:32 +00005463 /* if we are non-hot team, release our threads */
5464 if (!use_hot_team) {
5465 if (__kmp_tasking_mode != tskm_immediate_exec) {
5466 // Wait for threads to reach reapable state
5467 for (f = 1; f < team->t.t_nproc; ++f) {
5468 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5469 kmp_info_t *th = team->t.t_threads[f];
5470 volatile kmp_uint32 *state = &th->th.th_reap_state;
5471 while (*state != KMP_SAFE_TO_REAP) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005472#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00005473 // On Windows a thread can be killed at any time, check this
5474 DWORD ecode;
5475 if (!__kmp_is_thread_alive(th, &ecode)) {
5476 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
5477 break;
5478 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005479#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005480 // first check if thread is sleeping
5481 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5482 if (fl.is_sleeping())
5483 fl.resume(__kmp_gtid_from_thread(th));
5484 KMP_CPU_PAUSE();
5485 }
5486 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005487
Jonathan Peyton30419822017-05-12 18:01:32 +00005488 // Delete task teams
5489 int tt_idx;
5490 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5491 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5492 if (task_team != NULL) {
5493 for (f = 0; f < team->t.t_nproc;
5494 ++f) { // Have all threads unref task teams
5495 team->t.t_threads[f]->th.th_task_team = NULL;
5496 }
5497 KA_TRACE(
5498 20,
5499 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5500 __kmp_get_gtid(), task_team, team->t.t_id));
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005501#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005502 __kmp_free_task_team(master, task_team);
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005503#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005504 team->t.t_task_team[tt_idx] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005505 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005506 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005507 }
5508
Jonathan Peyton30419822017-05-12 18:01:32 +00005509 // Reset pointer to parent team only for non-hot teams.
5510 team->t.t_parent = NULL;
5511 team->t.t_level = 0;
5512 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005513
Jonathan Peyton30419822017-05-12 18:01:32 +00005514 /* free the worker threads */
5515 for (f = 1; f < team->t.t_nproc; ++f) {
5516 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5517 __kmp_free_thread(team->t.t_threads[f]);
5518 team->t.t_threads[f] = NULL;
5519 }
5520
5521 /* put the team back in the team pool */
5522 /* TODO limit size of team pool, call reap_team if pool too large */
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005523 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005524 __kmp_team_pool = (volatile kmp_team_t *)team;
5525 }
5526
5527 KMP_MB();
5528}
Jim Cownie5e8470a2013-09-27 10:38:44 +00005529
5530/* reap the team. destroy it, reclaim all its resources and free its memory */
Jonathan Peyton30419822017-05-12 18:01:32 +00005531kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5532 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005533
Jonathan Peyton30419822017-05-12 18:01:32 +00005534 KMP_DEBUG_ASSERT(team);
5535 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5536 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5537 KMP_DEBUG_ASSERT(team->t.t_threads);
5538 KMP_DEBUG_ASSERT(team->t.t_argv);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005539
Jonathan Peyton30419822017-05-12 18:01:32 +00005540 /* TODO clean the threads that are a part of this? */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005541
Jonathan Peyton30419822017-05-12 18:01:32 +00005542 /* free stuff */
5543 __kmp_free_team_arrays(team);
5544 if (team->t.t_argv != &team->t.t_inline_argv[0])
5545 __kmp_free((void *)team->t.t_argv);
5546 __kmp_free(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005547
Jonathan Peyton30419822017-05-12 18:01:32 +00005548 KMP_MB();
5549 return next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005550}
5551
Jim Cownie5e8470a2013-09-27 10:38:44 +00005552// Free the thread. Don't reap it, just place it on the pool of available
5553// threads.
5554//
5555// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5556// binding for the affinity mechanism to be useful.
5557//
5558// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5559// However, we want to avoid a potential performance problem by always
5560// scanning through the list to find the correct point at which to insert
5561// the thread (potential N**2 behavior). To do this we keep track of the
5562// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5563// With single-level parallelism, threads will always be added to the tail
5564// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5565// parallelism, all bets are off and we may need to scan through the entire
5566// free list.
5567//
5568// This change also has a potentially large performance benefit, for some
5569// applications. Previously, as threads were freed from the hot team, they
5570// would be placed back on the free list in inverse order. If the hot team
5571// grew back to it's original size, then the freed thread would be placed
5572// back on the hot team in reverse order. This could cause bad cache
5573// locality problems on programs where the size of the hot team regularly
5574// grew and shrunk.
5575//
5576// Now, for single-level parallelism, the OMP tid is alway == gtid.
Jonathan Peyton30419822017-05-12 18:01:32 +00005577void __kmp_free_thread(kmp_info_t *this_th) {
5578 int gtid;
5579 kmp_info_t **scan;
Jonathan Peytonf4392462017-07-27 20:58:41 +00005580 kmp_root_t *root = this_th->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005581
Jonathan Peyton30419822017-05-12 18:01:32 +00005582 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5583 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005584
Jonathan Peyton30419822017-05-12 18:01:32 +00005585 KMP_DEBUG_ASSERT(this_th);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005586
Jonathan Peyton30419822017-05-12 18:01:32 +00005587 // When moving thread to pool, switch thread to wait on own b_go flag, and
5588 // uninitialized (NULL team).
5589 int b;
5590 kmp_balign_t *balign = this_th->th.th_bar;
5591 for (b = 0; b < bs_last_barrier; ++b) {
5592 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5593 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5594 balign[b].bb.team = NULL;
5595 balign[b].bb.leaf_kids = 0;
5596 }
5597 this_th->th.th_task_state = 0;
Andrey Churbanov3336aa02018-03-19 18:05:15 +00005598 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
Jonathan Peyton30419822017-05-12 18:01:32 +00005599
5600 /* put thread back on the free pool */
5601 TCW_PTR(this_th->th.th_team, NULL);
5602 TCW_PTR(this_th->th.th_root, NULL);
5603 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5604
Jonathan Peytonbff8ded2018-01-10 18:24:09 +00005605 /* If the implicit task assigned to this thread can be used by other threads
5606 * -> multiple threads can share the data and try to free the task at
5607 * __kmp_reap_thread at exit. This duplicate use of the task data can happen
5608 * with higher probability when hot team is disabled but can occurs even when
5609 * the hot team is enabled */
5610 __kmp_free_implicit_task(this_th);
5611 this_th->th.th_current_task = NULL;
5612
Jonathan Peyton30419822017-05-12 18:01:32 +00005613 // If the __kmp_thread_pool_insert_pt is already past the new insert
5614 // point, then we need to re-scan the entire list.
5615 gtid = this_th->th.th_info.ds.ds_gtid;
5616 if (__kmp_thread_pool_insert_pt != NULL) {
5617 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5618 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5619 __kmp_thread_pool_insert_pt = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005620 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005621 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005622
Jonathan Peyton30419822017-05-12 18:01:32 +00005623 // Scan down the list to find the place to insert the thread.
5624 // scan is the address of a link in the list, possibly the address of
5625 // __kmp_thread_pool itself.
5626 //
5627 // In the absence of nested parallism, the for loop will have 0 iterations.
5628 if (__kmp_thread_pool_insert_pt != NULL) {
5629 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5630 } else {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005631 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005632 }
5633 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5634 scan = &((*scan)->th.th_next_pool))
5635 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005636
Jonathan Peyton30419822017-05-12 18:01:32 +00005637 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5638 // to its address.
5639 TCW_PTR(this_th->th.th_next_pool, *scan);
5640 __kmp_thread_pool_insert_pt = *scan = this_th;
5641 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5642 (this_th->th.th_info.ds.ds_gtid <
5643 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5644 TCW_4(this_th->th.th_in_pool, TRUE);
5645 __kmp_thread_pool_nth++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005646
Jonathan Peyton30419822017-05-12 18:01:32 +00005647 TCW_4(__kmp_nth, __kmp_nth - 1);
Jonathan Peytonf4392462017-07-27 20:58:41 +00005648 root->r.r_cg_nthreads--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005649
5650#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005651 /* Adjust blocktime back to user setting or default if necessary */
5652 /* Middle initialization might never have occurred */
5653 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5654 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5655 if (__kmp_nth <= __kmp_avail_proc) {
5656 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005657 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005658 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005659#endif /* KMP_ADJUST_BLOCKTIME */
5660
Jonathan Peyton30419822017-05-12 18:01:32 +00005661 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005662}
5663
Jim Cownie5e8470a2013-09-27 10:38:44 +00005664/* ------------------------------------------------------------------------ */
5665
Jonathan Peyton30419822017-05-12 18:01:32 +00005666void *__kmp_launch_thread(kmp_info_t *this_thr) {
5667 int gtid = this_thr->th.th_info.ds.ds_gtid;
5668 /* void *stack_data;*/
5669 kmp_team_t *(*volatile pteam);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005670
Jonathan Peyton30419822017-05-12 18:01:32 +00005671 KMP_MB();
5672 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005673
Jonathan Peyton30419822017-05-12 18:01:32 +00005674 if (__kmp_env_consistency_check) {
5675 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
5676 }
5677
5678#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005679 ompt_data_t *thread_data;
5680 if (ompt_enabled.enabled) {
5681 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
Jonathan Peyton3574f282018-10-04 14:57:04 +00005682 *thread_data = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00005683
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00005684 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005685 this_thr->th.ompt_thread_info.wait_id = 0;
Joachim Protze82e94a52017-11-01 10:08:30 +00005686 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5687 if (ompt_enabled.ompt_callback_thread_begin) {
5688 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5689 ompt_thread_worker, thread_data);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005690 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005691 }
5692#endif
5693
Joachim Protze82e94a52017-11-01 10:08:30 +00005694#if OMPT_SUPPORT
5695 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00005696 this_thr->th.ompt_thread_info.state = ompt_state_idle;
Joachim Protze82e94a52017-11-01 10:08:30 +00005697 }
5698#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005699 /* This is the place where threads wait for work */
5700 while (!TCR_4(__kmp_global.g.g_done)) {
5701 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5702 KMP_MB();
5703
5704 /* wait for work to do */
5705 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005706
Jonathan Peyton30419822017-05-12 18:01:32 +00005707 /* No tid yet since not part of a team */
5708 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5709
5710#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005711 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00005712 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005713 }
5714#endif
5715
5716 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5717
5718 /* have we been allocated? */
5719 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005720 /* we were just woken up, so run our new task */
5721 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5722 int rc;
5723 KA_TRACE(20,
5724 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5725 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5726 (*pteam)->t.t_pkfn));
5727
5728 updateHWFPControl(*pteam);
5729
5730#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005731 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00005732 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00005733 }
5734#endif
5735
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00005736 rc = (*pteam)->t.t_invoke(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00005737 KMP_ASSERT(rc);
5738
Jim Cownie5e8470a2013-09-27 10:38:44 +00005739 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00005740 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5741 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5742 (*pteam)->t.t_pkfn));
5743 }
Joachim Protze82e94a52017-11-01 10:08:30 +00005744#if OMPT_SUPPORT
5745 if (ompt_enabled.enabled) {
5746 /* no frame set while outside task */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00005747 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00005748
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00005749 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005750 }
5751#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00005752 /* join barrier after parallel region */
5753 __kmp_join_barrier(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005754 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005755 }
5756 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005757
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005758#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005759 if (ompt_enabled.ompt_callback_thread_end) {
5760 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005761 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005762#endif
5763
Jonathan Peyton30419822017-05-12 18:01:32 +00005764 this_thr->th.th_task_team = NULL;
5765 /* run the destructors for the threadprivate data for this thread */
5766 __kmp_common_destroy_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005767
Jonathan Peyton30419822017-05-12 18:01:32 +00005768 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
5769 KMP_MB();
5770 return this_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005771}
5772
5773/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005774
Jonathan Peyton30419822017-05-12 18:01:32 +00005775void __kmp_internal_end_dest(void *specific_gtid) {
5776#if KMP_COMPILER_ICC
5777#pragma warning(push)
5778#pragma warning(disable : 810) // conversion from "void *" to "int" may lose
5779// significant bits
5780#endif
5781 // Make sure no significant bits are lost
5782 int gtid = (kmp_intptr_t)specific_gtid - 1;
5783#if KMP_COMPILER_ICC
5784#pragma warning(pop)
5785#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005786
Jonathan Peyton30419822017-05-12 18:01:32 +00005787 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5788 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5789 * this is because 0 is reserved for the nothing-stored case */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005790
Jonathan Peyton30419822017-05-12 18:01:32 +00005791 /* josh: One reason for setting the gtid specific data even when it is being
5792 destroyed by pthread is to allow gtid lookup through thread specific data
5793 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5794 that gets executed in the call to __kmp_internal_end_thread, actually
5795 gets the gtid through the thread specific data. Setting it here seems
5796 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5797 to run smoothly.
5798 todo: get rid of this after we remove the dependence on
5799 __kmp_gtid_get_specific */
5800 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5801 __kmp_gtid_set_specific(gtid);
5802#ifdef KMP_TDATA_GTID
5803 __kmp_gtid = gtid;
5804#endif
5805 __kmp_internal_end_thread(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005806}
5807
Jonathan Peyton99016992015-05-26 17:32:53 +00005808#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005809
Jonathan Peyton30419822017-05-12 18:01:32 +00005810// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases
5811// destructors work perfectly, but in real libomp.so I have no evidence it is
5812// ever called. However, -fini linker option in makefile.mk works fine.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005813
Jonathan Peyton30419822017-05-12 18:01:32 +00005814__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
5815 __kmp_internal_end_atexit();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005816}
5817
Jonathan Peyton30419822017-05-12 18:01:32 +00005818void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005819
5820#endif
5821
Jonathan Peyton30419822017-05-12 18:01:32 +00005822/* [Windows] josh: when the atexit handler is called, there may still be more
5823 than one thread alive */
5824void __kmp_internal_end_atexit(void) {
5825 KA_TRACE(30, ("__kmp_internal_end_atexit\n"));
5826 /* [Windows]
5827 josh: ideally, we want to completely shutdown the library in this atexit
5828 handler, but stat code that depends on thread specific data for gtid fails
5829 because that data becomes unavailable at some point during the shutdown, so
5830 we call __kmp_internal_end_thread instead. We should eventually remove the
5831 dependency on __kmp_get_specific_gtid in the stat code and use
5832 __kmp_internal_end_library to cleanly shutdown the library.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005833
Jonathan Peyton30419822017-05-12 18:01:32 +00005834 // TODO: Can some of this comment about GVS be removed?
5835 I suspect that the offending stat code is executed when the calling thread
5836 tries to clean up a dead root thread's data structures, resulting in GVS
5837 code trying to close the GVS structures for that thread, but since the stat
5838 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
5839 the calling thread is cleaning up itself instead of another thread, it get
5840 confused. This happens because allowing a thread to unregister and cleanup
5841 another thread is a recent modification for addressing an issue.
5842 Based on the current design (20050722), a thread may end up
5843 trying to unregister another thread only if thread death does not trigger
5844 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
5845 thread specific data destructor function to detect thread death. For
5846 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
5847 is nothing. Thus, the workaround is applicable only for Windows static
5848 stat library. */
5849 __kmp_internal_end_library(-1);
5850#if KMP_OS_WINDOWS
5851 __kmp_close_console();
5852#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005853}
5854
Jonathan Peyton30419822017-05-12 18:01:32 +00005855static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
5856 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005857
Jonathan Peyton30419822017-05-12 18:01:32 +00005858 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005859
Jonathan Peyton30419822017-05-12 18:01:32 +00005860 KMP_DEBUG_ASSERT(thread != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005861
Jonathan Peyton30419822017-05-12 18:01:32 +00005862 gtid = thread->th.th_info.ds.ds_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005863
Jonathan Peyton30419822017-05-12 18:01:32 +00005864 if (!is_root) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005865 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5866 /* Assume the threads are at the fork barrier here */
5867 KA_TRACE(
5868 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5869 gtid));
5870 /* Need release fence here to prevent seg faults for tree forkjoin barrier
5871 * (GEH) */
5872 ANNOTATE_HAPPENS_BEFORE(thread);
5873 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5874 __kmp_release_64(&flag);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005875 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005876
Jonathan Peyton30419822017-05-12 18:01:32 +00005877 // Terminate OS thread.
5878 __kmp_reap_worker(thread);
Jonathan Peyton7ca7ef02016-11-21 16:18:57 +00005879
Jonathan Peyton30419822017-05-12 18:01:32 +00005880 // The thread was killed asynchronously. If it was actively
5881 // spinning in the thread pool, decrement the global count.
5882 //
5883 // There is a small timing hole here - if the worker thread was just waking
5884 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
5885 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
5886 // the global counter might not get updated.
5887 //
5888 // Currently, this can only happen as the library is unloaded,
5889 // so there are no harmful side effects.
5890 if (thread->th.th_active_in_pool) {
5891 thread->th.th_active_in_pool = FALSE;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00005892 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
5893 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
Jonathan Peyton30419822017-05-12 18:01:32 +00005894 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005895
Jonathan Peyton30419822017-05-12 18:01:32 +00005896 // Decrement # of [worker] threads in the pool.
5897 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5898 --__kmp_thread_pool_nth;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005899 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005900
Jonathan Peyton30419822017-05-12 18:01:32 +00005901 __kmp_free_implicit_task(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005902
Jonathan Peyton30419822017-05-12 18:01:32 +00005903// Free the fast memory for tasking
5904#if USE_FAST_MEMORY
5905 __kmp_free_fast_memory(thread);
5906#endif /* USE_FAST_MEMORY */
5907
5908 __kmp_suspend_uninitialize_thread(thread);
5909
5910 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5911 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5912
5913 --__kmp_all_nth;
5914// __kmp_nth was decremented when thread is added to the pool.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005915
5916#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005917 /* Adjust blocktime back to user setting or default if necessary */
5918 /* Middle initialization might never have occurred */
5919 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5920 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5921 if (__kmp_nth <= __kmp_avail_proc) {
5922 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005923 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005924 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005925#endif /* KMP_ADJUST_BLOCKTIME */
5926
Jonathan Peyton30419822017-05-12 18:01:32 +00005927 /* free the memory being used */
5928 if (__kmp_env_consistency_check) {
5929 if (thread->th.th_cons) {
5930 __kmp_free_cons_stack(thread->th.th_cons);
5931 thread->th.th_cons = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005932 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005933 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005934
Jonathan Peyton30419822017-05-12 18:01:32 +00005935 if (thread->th.th_pri_common != NULL) {
5936 __kmp_free(thread->th.th_pri_common);
5937 thread->th.th_pri_common = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005938 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005939
Jonathan Peyton30419822017-05-12 18:01:32 +00005940 if (thread->th.th_task_state_memo_stack != NULL) {
5941 __kmp_free(thread->th.th_task_state_memo_stack);
5942 thread->th.th_task_state_memo_stack = NULL;
5943 }
5944
5945#if KMP_USE_BGET
5946 if (thread->th.th_local.bget_data != NULL) {
5947 __kmp_finalize_bget(thread);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005948 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005949#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005950
Alp Toker98758b02014-03-02 04:12:06 +00005951#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00005952 if (thread->th.th_affin_mask != NULL) {
5953 KMP_CPU_FREE(thread->th.th_affin_mask);
5954 thread->th.th_affin_mask = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005955 }
Alp Toker98758b02014-03-02 04:12:06 +00005956#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005957
Jonathan Peytonf6399362018-07-09 17:51:13 +00005958#if KMP_USE_HIER_SCHED
5959 if (thread->th.th_hier_bar_data != NULL) {
5960 __kmp_free(thread->th.th_hier_bar_data);
5961 thread->th.th_hier_bar_data = NULL;
5962 }
5963#endif
5964
Jonathan Peyton30419822017-05-12 18:01:32 +00005965 __kmp_reap_team(thread->th.th_serial_team);
5966 thread->th.th_serial_team = NULL;
5967 __kmp_free(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005968
Jonathan Peyton30419822017-05-12 18:01:32 +00005969 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005970
5971} // __kmp_reap_thread
5972
Jonathan Peyton30419822017-05-12 18:01:32 +00005973static void __kmp_internal_end(void) {
5974 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005975
Jonathan Peyton30419822017-05-12 18:01:32 +00005976 /* First, unregister the library */
5977 __kmp_unregister_library();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005978
Jonathan Peyton30419822017-05-12 18:01:32 +00005979#if KMP_OS_WINDOWS
5980 /* In Win static library, we can't tell when a root actually dies, so we
5981 reclaim the data structures for any root threads that have died but not
5982 unregistered themselves, in order to shut down cleanly.
5983 In Win dynamic library we also can't tell when a thread dies. */
5984 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
5985// dead roots
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005986#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005987
Jonathan Peyton30419822017-05-12 18:01:32 +00005988 for (i = 0; i < __kmp_threads_capacity; i++)
5989 if (__kmp_root[i])
5990 if (__kmp_root[i]->r.r_active)
5991 break;
5992 KMP_MB(); /* Flush all pending memory write invalidates. */
5993 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5994
5995 if (i < __kmp_threads_capacity) {
5996#if KMP_USE_MONITOR
5997 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5998 KMP_MB(); /* Flush all pending memory write invalidates. */
5999
Jonathan Peyton94a114f2017-10-20 19:30:57 +00006000 // Need to check that monitor was initialized before reaping it. If we are
6001 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
6002 // __kmp_monitor will appear to contain valid data, but it is only valid in
6003 // the parent process, not the child.
Jonathan Peyton30419822017-05-12 18:01:32 +00006004 // New behavior (201008): instead of keying off of the flag
6005 // __kmp_init_parallel, the monitor thread creation is keyed off
6006 // of the new flag __kmp_init_monitor.
6007 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6008 if (TCR_4(__kmp_init_monitor)) {
6009 __kmp_reap_monitor(&__kmp_monitor);
6010 TCW_4(__kmp_init_monitor, 0);
6011 }
6012 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6013 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
6014#endif // KMP_USE_MONITOR
6015 } else {
6016/* TODO move this to cleanup code */
6017#ifdef KMP_DEBUG
6018 /* make sure that everything has properly ended */
6019 for (i = 0; i < __kmp_threads_capacity; i++) {
6020 if (__kmp_root[i]) {
6021 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
6022 // there can be uber threads alive here
6023 KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active?
6024 }
6025 }
6026#endif
6027
6028 KMP_MB();
6029
6030 // Reap the worker threads.
6031 // This is valid for now, but be careful if threads are reaped sooner.
6032 while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool.
6033 // Get the next thread from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00006034 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00006035 __kmp_thread_pool = thread->th.th_next_pool;
6036 // Reap it.
6037 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6038 thread->th.th_next_pool = NULL;
6039 thread->th.th_in_pool = FALSE;
6040 __kmp_reap_thread(thread, 0);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006041 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006042 __kmp_thread_pool_insert_pt = NULL;
6043
6044 // Reap teams.
6045 while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool.
6046 // Get the next team from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00006047 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00006048 __kmp_team_pool = team->t.t_next_pool;
6049 // Reap it.
6050 team->t.t_next_pool = NULL;
6051 __kmp_reap_team(team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006052 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006053
6054 __kmp_reap_task_teams();
6055
Jonathan Peytona764af62018-07-19 19:17:00 +00006056#if KMP_OS_UNIX
6057 // Threads that are not reaped should not access any resources since they
6058 // are going to be deallocated soon, so the shutdown sequence should wait
6059 // until all threads either exit the final spin-waiting loop or begin
6060 // sleeping after the given blocktime.
6061 for (i = 0; i < __kmp_threads_capacity; i++) {
6062 kmp_info_t *thr = __kmp_threads[i];
6063 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6064 KMP_CPU_PAUSE();
6065 }
6066#endif
6067
Jonathan Peyton30419822017-05-12 18:01:32 +00006068 for (i = 0; i < __kmp_threads_capacity; ++i) {
6069 // TBD: Add some checking...
6070 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
6071 }
6072
6073 /* Make sure all threadprivate destructors get run by joining with all
6074 worker threads before resetting this flag */
6075 TCW_SYNC_4(__kmp_init_common, FALSE);
6076
6077 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"));
6078 KMP_MB();
6079
6080#if KMP_USE_MONITOR
6081 // See note above: One of the possible fixes for CQ138434 / CQ140126
6082 //
6083 // FIXME: push both code fragments down and CSE them?
6084 // push them into __kmp_cleanup() ?
6085 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6086 if (TCR_4(__kmp_init_monitor)) {
6087 __kmp_reap_monitor(&__kmp_monitor);
6088 TCW_4(__kmp_init_monitor, 0);
6089 }
6090 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6091 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
6092#endif
6093 } /* else !__kmp_global.t_active */
6094 TCW_4(__kmp_init_gtid, FALSE);
6095 KMP_MB(); /* Flush all pending memory write invalidates. */
6096
6097 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006098#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006099 ompt_fini();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006100#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006101}
6102
Jonathan Peyton30419822017-05-12 18:01:32 +00006103void __kmp_internal_end_library(int gtid_req) {
6104 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6105 /* this shouldn't be a race condition because __kmp_internal_end() is the
6106 only place to clear __kmp_serial_init */
6107 /* we'll check this later too, after we get the lock */
6108 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6109 // redundaant, because the next check will work in any case.
6110 if (__kmp_global.g.g_abort) {
6111 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"));
6112 /* TODO abort? */
6113 return;
6114 }
6115 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6116 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"));
6117 return;
6118 }
6119
6120 KMP_MB(); /* Flush all pending memory write invalidates. */
6121
6122 /* find out who we are and what we should do */
6123 {
6124 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6125 KA_TRACE(
6126 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6127 if (gtid == KMP_GTID_SHUTDOWN) {
6128 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "
6129 "already shutdown\n"));
6130 return;
6131 } else if (gtid == KMP_GTID_MONITOR) {
6132 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "
6133 "registered, or system shutdown\n"));
6134 return;
6135 } else if (gtid == KMP_GTID_DNE) {
6136 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "
6137 "shutdown\n"));
6138 /* we don't know who we are, but we may still shutdown the library */
6139 } else if (KMP_UBER_GTID(gtid)) {
6140 /* unregister ourselves as an uber thread. gtid is no longer valid */
6141 if (__kmp_root[gtid]->r.r_active) {
6142 __kmp_global.g.g_abort = -1;
6143 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6144 KA_TRACE(10,
6145 ("__kmp_internal_end_library: root still active, abort T#%d\n",
6146 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006147 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006148 } else {
6149 KA_TRACE(
6150 10,
6151 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6152 __kmp_unregister_root_current_thread(gtid);
6153 }
6154 } else {
6155/* worker threads may call this function through the atexit handler, if they
6156 * call exit() */
6157/* For now, skip the usual subsequent processing and just dump the debug buffer.
6158 TODO: do a thorough shutdown instead */
6159#ifdef DUMP_DEBUG_ON_EXIT
6160 if (__kmp_debug_buf)
6161 __kmp_dump_debug_buffer();
6162#endif
6163 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006164 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006165 }
6166 /* synchronize the termination process */
6167 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006168
Jonathan Peyton30419822017-05-12 18:01:32 +00006169 /* have we already finished */
6170 if (__kmp_global.g.g_abort) {
6171 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"));
6172 /* TODO abort? */
6173 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6174 return;
6175 }
6176 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6177 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6178 return;
6179 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006180
Jonathan Peyton30419822017-05-12 18:01:32 +00006181 /* We need this lock to enforce mutex between this reading of
6182 __kmp_threads_capacity and the writing by __kmp_register_root.
6183 Alternatively, we can use a counter of roots that is atomically updated by
6184 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6185 __kmp_internal_end_*. */
6186 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006187
Jonathan Peyton30419822017-05-12 18:01:32 +00006188 /* now we can safely conduct the actual termination */
6189 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006190
Jonathan Peyton30419822017-05-12 18:01:32 +00006191 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6192 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006193
Jonathan Peyton30419822017-05-12 18:01:32 +00006194 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006195
Jonathan Peyton30419822017-05-12 18:01:32 +00006196#ifdef DUMP_DEBUG_ON_EXIT
6197 if (__kmp_debug_buf)
6198 __kmp_dump_debug_buffer();
6199#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006200
Jonathan Peyton30419822017-05-12 18:01:32 +00006201#if KMP_OS_WINDOWS
6202 __kmp_close_console();
6203#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006204
Jonathan Peyton30419822017-05-12 18:01:32 +00006205 __kmp_fini_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006206
6207} // __kmp_internal_end_library
6208
Jonathan Peyton30419822017-05-12 18:01:32 +00006209void __kmp_internal_end_thread(int gtid_req) {
6210 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006211
Jonathan Peyton30419822017-05-12 18:01:32 +00006212 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6213 /* this shouldn't be a race condition because __kmp_internal_end() is the
6214 * only place to clear __kmp_serial_init */
6215 /* we'll check this later too, after we get the lock */
6216 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6217 // redundant, because the next check will work in any case.
6218 if (__kmp_global.g.g_abort) {
6219 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"));
6220 /* TODO abort? */
6221 return;
6222 }
6223 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6224 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"));
6225 return;
6226 }
6227
6228 KMP_MB(); /* Flush all pending memory write invalidates. */
6229
6230 /* find out who we are and what we should do */
6231 {
6232 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6233 KA_TRACE(10,
6234 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6235 if (gtid == KMP_GTID_SHUTDOWN) {
6236 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
6237 "already shutdown\n"));
6238 return;
6239 } else if (gtid == KMP_GTID_MONITOR) {
6240 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "
6241 "registered, or system shutdown\n"));
6242 return;
6243 } else if (gtid == KMP_GTID_DNE) {
6244 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "
6245 "shutdown\n"));
6246 return;
6247 /* we don't know who we are */
6248 } else if (KMP_UBER_GTID(gtid)) {
6249 /* unregister ourselves as an uber thread. gtid is no longer valid */
6250 if (__kmp_root[gtid]->r.r_active) {
6251 __kmp_global.g.g_abort = -1;
6252 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6253 KA_TRACE(10,
6254 ("__kmp_internal_end_thread: root still active, abort T#%d\n",
6255 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006256 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006257 } else {
6258 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",
6259 gtid));
6260 __kmp_unregister_root_current_thread(gtid);
6261 }
6262 } else {
6263 /* just a worker thread, let's leave */
6264 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6265
6266 if (gtid >= 0) {
6267 __kmp_threads[gtid]->th.th_task_team = NULL;
6268 }
6269
6270 KA_TRACE(10,
6271 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6272 gtid));
6273 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006274 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006275 }
Jonathan Peyton8b3842f2018-10-05 17:59:39 +00006276#if KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +00006277 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber
6278 // thread, because we will better shutdown later in the library destructor.
6279 // The reason of this change is performance problem when non-openmp thread in
6280 // a loop forks and joins many openmp threads. We can save a lot of time
6281 // keeping worker threads alive until the program shutdown.
6282 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966)
6283 // and Windows(DPD200287443) that occurs when using critical sections from
6284 // foreign threads.
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00006285 if (__kmp_pause_status != kmp_hard_paused) {
6286 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6287 return;
6288 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006289#endif
6290 /* synchronize the termination process */
6291 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006292
Jonathan Peyton30419822017-05-12 18:01:32 +00006293 /* have we already finished */
6294 if (__kmp_global.g.g_abort) {
6295 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"));
6296 /* TODO abort? */
6297 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6298 return;
6299 }
6300 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6301 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6302 return;
6303 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006304
Jonathan Peyton30419822017-05-12 18:01:32 +00006305 /* We need this lock to enforce mutex between this reading of
6306 __kmp_threads_capacity and the writing by __kmp_register_root.
6307 Alternatively, we can use a counter of roots that is atomically updated by
6308 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6309 __kmp_internal_end_*. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006310
Jonathan Peyton30419822017-05-12 18:01:32 +00006311 /* should we finish the run-time? are all siblings done? */
6312 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006313
Jonathan Peyton30419822017-05-12 18:01:32 +00006314 for (i = 0; i < __kmp_threads_capacity; ++i) {
6315 if (KMP_UBER_GTID(i)) {
6316 KA_TRACE(
6317 10,
6318 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6319 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6320 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6321 return;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006322 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006323 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006324
Jonathan Peyton30419822017-05-12 18:01:32 +00006325 /* now we can safely conduct the actual termination */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006326
Jonathan Peyton30419822017-05-12 18:01:32 +00006327 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006328
Jonathan Peyton30419822017-05-12 18:01:32 +00006329 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6330 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006331
Jonathan Peyton30419822017-05-12 18:01:32 +00006332 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006333
Jonathan Peyton30419822017-05-12 18:01:32 +00006334#ifdef DUMP_DEBUG_ON_EXIT
6335 if (__kmp_debug_buf)
6336 __kmp_dump_debug_buffer();
6337#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006338} // __kmp_internal_end_thread
6339
Jonathan Peyton30419822017-05-12 18:01:32 +00006340// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006341// Library registration stuff.
6342
Jonathan Peyton30419822017-05-12 18:01:32 +00006343static long __kmp_registration_flag = 0;
6344// Random value used to indicate library initialization.
6345static char *__kmp_registration_str = NULL;
6346// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006347
Jonathan Peyton30419822017-05-12 18:01:32 +00006348static inline char *__kmp_reg_status_name() {
6349 /* On RHEL 3u5 if linked statically, getpid() returns different values in
6350 each thread. If registration and unregistration go in different threads
6351 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6352 env var can not be found, because the name will contain different pid. */
6353 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00006354} // __kmp_reg_status_get
6355
Jonathan Peyton30419822017-05-12 18:01:32 +00006356void __kmp_register_library_startup(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006357
Jonathan Peyton30419822017-05-12 18:01:32 +00006358 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6359 int done = 0;
6360 union {
6361 double dtime;
6362 long ltime;
6363 } time;
6364#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6365 __kmp_initialize_system_tick();
6366#endif
6367 __kmp_read_system_time(&time.dtime);
6368 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6369 __kmp_registration_str =
6370 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,
6371 __kmp_registration_flag, KMP_LIBRARY_FILE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006372
Jonathan Peyton30419822017-05-12 18:01:32 +00006373 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,
6374 __kmp_registration_str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006375
Jonathan Peyton30419822017-05-12 18:01:32 +00006376 while (!done) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006377
Jonathan Peyton30419822017-05-12 18:01:32 +00006378 char *value = NULL; // Actual value of the environment variable.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006379
Jonathan Peyton30419822017-05-12 18:01:32 +00006380 // Set environment variable, but do not overwrite if it is exist.
6381 __kmp_env_set(name, __kmp_registration_str, 0);
6382 // Check the variable is written.
6383 value = __kmp_env_get(name);
6384 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006385
Jonathan Peyton30419822017-05-12 18:01:32 +00006386 done = 1; // Ok, environment variable set successfully, exit the loop.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006387
Jonathan Peyton30419822017-05-12 18:01:32 +00006388 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006389
Jonathan Peyton30419822017-05-12 18:01:32 +00006390 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6391 // Check whether it alive or dead.
6392 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6393 char *tail = value;
6394 char *flag_addr_str = NULL;
6395 char *flag_val_str = NULL;
6396 char const *file_name = NULL;
6397 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6398 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6399 file_name = tail;
6400 if (tail != NULL) {
6401 long *flag_addr = 0;
6402 long flag_val = 0;
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00006403 KMP_SSCANF(flag_addr_str, "%p", RCAST(void**, &flag_addr));
Jonathan Peyton30419822017-05-12 18:01:32 +00006404 KMP_SSCANF(flag_val_str, "%lx", &flag_val);
6405 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6406 // First, check whether environment-encoded address is mapped into
6407 // addr space.
6408 // If so, dereference it to see if it still has the right value.
6409 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6410 neighbor = 1;
6411 } else {
6412 // If not, then we know the other copy of the library is no longer
6413 // running.
6414 neighbor = 2;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006415 }
6416 }
6417 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006418 switch (neighbor) {
6419 case 0: // Cannot parse environment variable -- neighbor status unknown.
6420 // Assume it is the incompatible format of future version of the
6421 // library. Assume the other library is alive.
6422 // WARN( ... ); // TODO: Issue a warning.
6423 file_name = "unknown library";
Joachim Protze0c599c32019-02-04 15:59:42 +00006424 KMP_FALLTHROUGH();
Jonathan Peyton30419822017-05-12 18:01:32 +00006425 // Attention! Falling to the next case. That's intentional.
6426 case 1: { // Neighbor is alive.
6427 // Check it is allowed.
6428 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6429 if (!__kmp_str_match_true(duplicate_ok)) {
6430 // That's not allowed. Issue fatal error.
Jonathan Peyton6a393f72017-09-05 15:43:58 +00006431 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6432 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006433 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006434 KMP_INTERNAL_FREE(duplicate_ok);
6435 __kmp_duplicate_library_ok = 1;
6436 done = 1; // Exit the loop.
6437 } break;
6438 case 2: { // Neighbor is dead.
6439 // Clear the variable and try to register library again.
6440 __kmp_env_unset(name);
6441 } break;
6442 default: { KMP_DEBUG_ASSERT(0); } break;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006443 }
6444 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006445 KMP_INTERNAL_FREE((void *)value);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006446 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006447 KMP_INTERNAL_FREE((void *)name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006448
6449} // func __kmp_register_library_startup
6450
Jonathan Peyton30419822017-05-12 18:01:32 +00006451void __kmp_unregister_library(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006452
Jonathan Peyton30419822017-05-12 18:01:32 +00006453 char *name = __kmp_reg_status_name();
6454 char *value = __kmp_env_get(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006455
Jonathan Peyton30419822017-05-12 18:01:32 +00006456 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6457 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6458 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6459 // Ok, this is our variable. Delete it.
6460 __kmp_env_unset(name);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006461 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006462
Jonathan Peyton30419822017-05-12 18:01:32 +00006463 KMP_INTERNAL_FREE(__kmp_registration_str);
6464 KMP_INTERNAL_FREE(value);
6465 KMP_INTERNAL_FREE(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006466
Jonathan Peyton30419822017-05-12 18:01:32 +00006467 __kmp_registration_flag = 0;
6468 __kmp_registration_str = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006469
6470} // __kmp_unregister_library
6471
Jim Cownie5e8470a2013-09-27 10:38:44 +00006472// End of Library registration stuff.
Jonathan Peyton30419822017-05-12 18:01:32 +00006473// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006474
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006475#if KMP_MIC_SUPPORTED
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006476
Jonathan Peyton30419822017-05-12 18:01:32 +00006477static void __kmp_check_mic_type() {
6478 kmp_cpuid_t cpuid_state = {0};
6479 kmp_cpuid_t *cs_p = &cpuid_state;
6480 __kmp_x86_cpuid(1, 0, cs_p);
6481 // We don't support mic1 at the moment
6482 if ((cs_p->eax & 0xff0) == 0xB10) {
6483 __kmp_mic_type = mic2;
6484 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6485 __kmp_mic_type = mic3;
6486 } else {
6487 __kmp_mic_type = non_mic;
6488 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006489}
6490
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006491#endif /* KMP_MIC_SUPPORTED */
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006492
Jonathan Peyton30419822017-05-12 18:01:32 +00006493static void __kmp_do_serial_initialize(void) {
6494 int i, gtid;
6495 int size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006496
Jonathan Peyton30419822017-05-12 18:01:32 +00006497 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006498
Jonathan Peyton30419822017-05-12 18:01:32 +00006499 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4);
6500 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4);
6501 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8);
6502 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8);
6503 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006504
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006505#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006506 ompt_pre_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006507#endif
6508
Jonathan Peyton30419822017-05-12 18:01:32 +00006509 __kmp_validate_locks();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006510
Jonathan Peyton30419822017-05-12 18:01:32 +00006511 /* Initialize internal memory allocator */
6512 __kmp_init_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006513
Jonathan Peyton30419822017-05-12 18:01:32 +00006514 /* Register the library startup via an environment variable and check to see
6515 whether another copy of the library is already registered. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006516
Jonathan Peyton30419822017-05-12 18:01:32 +00006517 __kmp_register_library_startup();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006518
Jonathan Peyton30419822017-05-12 18:01:32 +00006519 /* TODO reinitialization of library */
6520 if (TCR_4(__kmp_global.g.g_done)) {
6521 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"));
6522 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006523
Jonathan Peyton30419822017-05-12 18:01:32 +00006524 __kmp_global.g.g_abort = 0;
6525 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006526
Jonathan Peyton30419822017-05-12 18:01:32 +00006527/* initialize the locks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006528#if KMP_USE_ADAPTIVE_LOCKS
6529#if KMP_DEBUG_ADAPTIVE_LOCKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006530 __kmp_init_speculative_stats();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006531#endif
6532#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006533#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006534 __kmp_stats_init();
Jonathan Peytonad579922015-12-17 16:19:05 +00006535#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006536 __kmp_init_lock(&__kmp_global_lock);
6537 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6538 __kmp_init_lock(&__kmp_debug_lock);
6539 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6540 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6541 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6542 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6543 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6544 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6545 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6546 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6547 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6548 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6549 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6550 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6551 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6552 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6553 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006554#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006555 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006556#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006557 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006558
Jonathan Peyton30419822017-05-12 18:01:32 +00006559 /* conduct initialization and initial setup of configuration */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006560
Jonathan Peyton30419822017-05-12 18:01:32 +00006561 __kmp_runtime_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006562
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006563#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006564 __kmp_check_mic_type();
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006565#endif
6566
Jonathan Peyton30419822017-05-12 18:01:32 +00006567// Some global variable initialization moved here from kmp_env_initialize()
Jim Cownie5e8470a2013-09-27 10:38:44 +00006568#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006569 kmp_diag = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006570#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006571 __kmp_abort_delay = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006572
Jonathan Peyton30419822017-05-12 18:01:32 +00006573 // From __kmp_init_dflt_team_nth()
6574 /* assume the entire machine will be used */
6575 __kmp_dflt_team_nth_ub = __kmp_xproc;
6576 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6577 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6578 }
6579 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6580 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6581 }
6582 __kmp_max_nth = __kmp_sys_max_nth;
Jonathan Peytonf4392462017-07-27 20:58:41 +00006583 __kmp_cg_max_nth = __kmp_sys_max_nth;
Jonathan Peyton4f90c822017-08-02 20:04:45 +00006584 __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default
6585 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6586 __kmp_teams_max_nth = __kmp_sys_max_nth;
6587 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006588
Jonathan Peyton30419822017-05-12 18:01:32 +00006589 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
6590 // part
6591 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006592#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006593 __kmp_monitor_wakeups =
6594 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6595 __kmp_bt_intervals =
6596 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006597#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006598 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6599 __kmp_library = library_throughput;
6600 // From KMP_SCHEDULE initialization
6601 __kmp_static = kmp_sch_static_balanced;
6602// AC: do not use analytical here, because it is non-monotonous
6603//__kmp_guided = kmp_sch_guided_iterative_chunked;
6604//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
6605// need to repeat assignment
6606// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
6607// bit control and barrier method control parts
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006608#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton30419822017-05-12 18:01:32 +00006609#define kmp_reduction_barrier_gather_bb ((int)1)
6610#define kmp_reduction_barrier_release_bb ((int)1)
6611#define kmp_reduction_barrier_gather_pat bp_hyper_bar
6612#define kmp_reduction_barrier_release_pat bp_hyper_bar
6613#endif // KMP_FAST_REDUCTION_BARRIER
6614 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6615 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6616 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6617 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6618 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6619#if KMP_FAST_REDUCTION_BARRIER
6620 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
6621 // lin_64 ): hyper,1
6622 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6623 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6624 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6625 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006626 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006627#endif // KMP_FAST_REDUCTION_BARRIER
6628 }
6629#if KMP_FAST_REDUCTION_BARRIER
6630#undef kmp_reduction_barrier_release_pat
6631#undef kmp_reduction_barrier_gather_pat
6632#undef kmp_reduction_barrier_release_bb
6633#undef kmp_reduction_barrier_gather_bb
6634#endif // KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006635#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006636 if (__kmp_mic_type == mic2) { // KNC
6637 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
6638 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather
6639 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6640 1; // forkjoin release
6641 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6642 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6643 }
6644#if KMP_FAST_REDUCTION_BARRIER
6645 if (__kmp_mic_type == mic2) { // KNC
6646 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6647 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6648 }
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006649#endif // KMP_FAST_REDUCTION_BARRIER
6650#endif // KMP_MIC_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006651
Jonathan Peyton30419822017-05-12 18:01:32 +00006652// From KMP_CHECKS initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00006653#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006654 __kmp_env_checks = TRUE; /* development versions have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006655#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006656 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006657#endif
6658
Jonathan Peyton30419822017-05-12 18:01:32 +00006659 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6660 __kmp_foreign_tp = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006661
Jonathan Peyton30419822017-05-12 18:01:32 +00006662 __kmp_global.g.g_dynamic = FALSE;
6663 __kmp_global.g.g_dynamic_mode = dynamic_default;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006664
Jonathan Peyton30419822017-05-12 18:01:32 +00006665 __kmp_env_initialize(NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006666
Jonathan Peyton30419822017-05-12 18:01:32 +00006667// Print all messages in message catalog for testing purposes.
6668#ifdef KMP_DEBUG
6669 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
6670 if (__kmp_str_match_true(val)) {
6671 kmp_str_buf_t buffer;
6672 __kmp_str_buf_init(&buffer);
6673 __kmp_i18n_dump_catalog(&buffer);
6674 __kmp_printf("%s", buffer.str);
6675 __kmp_str_buf_free(&buffer);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006676 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006677 __kmp_env_free(&val);
6678#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006679
Jonathan Peyton30419822017-05-12 18:01:32 +00006680 __kmp_threads_capacity =
6681 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6682 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6683 __kmp_tp_capacity = __kmp_default_tp_capacity(
6684 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006685
Jonathan Peyton30419822017-05-12 18:01:32 +00006686 // If the library is shut down properly, both pools must be NULL. Just in
6687 // case, set them to NULL -- some memory may leak, but subsequent code will
6688 // work even if pools are not freed.
6689 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6690 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6691 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6692 __kmp_thread_pool = NULL;
6693 __kmp_thread_pool_insert_pt = NULL;
6694 __kmp_team_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006695
Jonathan Peyton30419822017-05-12 18:01:32 +00006696 /* Allocate all of the variable sized records */
6697 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
6698 * expandable */
6699 /* Since allocation is cache-aligned, just add extra padding at the end */
6700 size =
6701 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6702 CACHE_LINE;
6703 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6704 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
6705 sizeof(kmp_info_t *) * __kmp_threads_capacity);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006706
Jonathan Peyton30419822017-05-12 18:01:32 +00006707 /* init thread counts */
6708 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6709 0); // Asserts fail if the library is reinitializing and
6710 KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination.
6711 __kmp_all_nth = 0;
6712 __kmp_nth = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006713
Jonathan Peyton30419822017-05-12 18:01:32 +00006714 /* setup the uber master thread and hierarchy */
6715 gtid = __kmp_register_root(TRUE);
6716 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid));
6717 KMP_ASSERT(KMP_UBER_GTID(gtid));
6718 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006719
Jonathan Peyton30419822017-05-12 18:01:32 +00006720 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006721
Jonathan Peyton30419822017-05-12 18:01:32 +00006722 __kmp_common_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006723
Jonathan Peyton30419822017-05-12 18:01:32 +00006724#if KMP_OS_UNIX
6725 /* invoke the child fork handler */
6726 __kmp_register_atfork();
6727#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006728
Jonathan Peyton8b3842f2018-10-05 17:59:39 +00006729#if !KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +00006730 {
6731 /* Invoke the exit handler when the program finishes, only for static
6732 library. For dynamic library, we already have _fini and DllMain. */
6733 int rc = atexit(__kmp_internal_end_atexit);
6734 if (rc != 0) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00006735 __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc),
6736 __kmp_msg_null);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006737 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006738 }
6739#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006740
Jonathan Peyton30419822017-05-12 18:01:32 +00006741#if KMP_HANDLE_SIGNALS
6742#if KMP_OS_UNIX
6743 /* NOTE: make sure that this is called before the user installs their own
6744 signal handlers so that the user handlers are called first. this way they
6745 can return false, not call our handler, avoid terminating the library, and
6746 continue execution where they left off. */
6747 __kmp_install_signals(FALSE);
6748#endif /* KMP_OS_UNIX */
6749#if KMP_OS_WINDOWS
6750 __kmp_install_signals(TRUE);
6751#endif /* KMP_OS_WINDOWS */
6752#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006753
Jonathan Peyton30419822017-05-12 18:01:32 +00006754 /* we have finished the serial initialization */
6755 __kmp_init_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006756
Jonathan Peyton30419822017-05-12 18:01:32 +00006757 __kmp_init_serial = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006758
Jonathan Peyton30419822017-05-12 18:01:32 +00006759 if (__kmp_settings) {
6760 __kmp_env_print();
6761 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006762
6763#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006764 if (__kmp_display_env || __kmp_display_env_verbose) {
6765 __kmp_env_print_2();
6766 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006767#endif // OMP_40_ENABLED
6768
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006769#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006770 ompt_post_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006771#endif
6772
Jonathan Peyton30419822017-05-12 18:01:32 +00006773 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006774
Jonathan Peyton30419822017-05-12 18:01:32 +00006775 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006776}
6777
Jonathan Peyton30419822017-05-12 18:01:32 +00006778void __kmp_serial_initialize(void) {
6779 if (__kmp_init_serial) {
6780 return;
6781 }
6782 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6783 if (__kmp_init_serial) {
6784 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6785 return;
6786 }
6787 __kmp_do_serial_initialize();
6788 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6789}
6790
6791static void __kmp_do_middle_initialize(void) {
6792 int i, j;
6793 int prev_dflt_team_nth;
6794
6795 if (!__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006796 __kmp_do_serial_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006797 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006798
Jonathan Peyton30419822017-05-12 18:01:32 +00006799 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006800
Jonathan Peyton30419822017-05-12 18:01:32 +00006801 // Save the previous value for the __kmp_dflt_team_nth so that
6802 // we can avoid some reinitialization if it hasn't changed.
6803 prev_dflt_team_nth = __kmp_dflt_team_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006804
Alp Toker98758b02014-03-02 04:12:06 +00006805#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006806 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6807 // number of cores on the machine.
6808 __kmp_affinity_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006809
Jonathan Peyton30419822017-05-12 18:01:32 +00006810 // Run through the __kmp_threads array and set the affinity mask
6811 // for each root thread that is currently registered with the RTL.
6812 for (i = 0; i < __kmp_threads_capacity; i++) {
6813 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6814 __kmp_affinity_set_init_mask(i, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006815 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006816 }
Alp Toker98758b02014-03-02 04:12:06 +00006817#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006818
Jonathan Peyton30419822017-05-12 18:01:32 +00006819 KMP_ASSERT(__kmp_xproc > 0);
6820 if (__kmp_avail_proc == 0) {
6821 __kmp_avail_proc = __kmp_xproc;
6822 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006823
Jonathan Peyton30419822017-05-12 18:01:32 +00006824 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
6825 // correct them now
6826 j = 0;
6827 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6828 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6829 __kmp_avail_proc;
6830 j++;
6831 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006832
Jonathan Peyton30419822017-05-12 18:01:32 +00006833 if (__kmp_dflt_team_nth == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006834#ifdef KMP_DFLT_NTH_CORES
Jonathan Peyton30419822017-05-12 18:01:32 +00006835 // Default #threads = #cores
6836 __kmp_dflt_team_nth = __kmp_ncores;
6837 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6838 "__kmp_ncores (%d)\n",
6839 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006840#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006841 // Default #threads = #available OS procs
6842 __kmp_dflt_team_nth = __kmp_avail_proc;
6843 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6844 "__kmp_avail_proc(%d)\n",
6845 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006846#endif /* KMP_DFLT_NTH_CORES */
Jonathan Peyton30419822017-05-12 18:01:32 +00006847 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006848
Jonathan Peyton30419822017-05-12 18:01:32 +00006849 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6850 __kmp_dflt_team_nth = KMP_MIN_NTH;
6851 }
6852 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6853 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6854 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006855
Jonathan Peyton30419822017-05-12 18:01:32 +00006856 // There's no harm in continuing if the following check fails,
6857 // but it indicates an error in the previous logic.
6858 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006859
Jonathan Peyton30419822017-05-12 18:01:32 +00006860 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6861 // Run through the __kmp_threads array and set the num threads icv for each
6862 // root thread that is currently registered with the RTL (which has not
6863 // already explicitly set its nthreads-var with a call to
6864 // omp_set_num_threads()).
6865 for (i = 0; i < __kmp_threads_capacity; i++) {
6866 kmp_info_t *thread = __kmp_threads[i];
6867 if (thread == NULL)
6868 continue;
6869 if (thread->th.th_current_task->td_icvs.nproc != 0)
6870 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006871
Jonathan Peyton30419822017-05-12 18:01:32 +00006872 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006873 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006874 }
6875 KA_TRACE(
6876 20,
6877 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6878 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006879
6880#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00006881 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
6882 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6883 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6884 if (__kmp_nth > __kmp_avail_proc) {
6885 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006886 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006887 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006888#endif /* KMP_ADJUST_BLOCKTIME */
6889
Jonathan Peyton30419822017-05-12 18:01:32 +00006890 /* we have finished middle initialization */
6891 TCW_SYNC_4(__kmp_init_middle, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006892
Jonathan Peyton30419822017-05-12 18:01:32 +00006893 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006894}
6895
Jonathan Peyton30419822017-05-12 18:01:32 +00006896void __kmp_middle_initialize(void) {
6897 if (__kmp_init_middle) {
6898 return;
6899 }
6900 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6901 if (__kmp_init_middle) {
6902 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6903 return;
6904 }
6905 __kmp_do_middle_initialize();
6906 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6907}
6908
6909void __kmp_parallel_initialize(void) {
6910 int gtid = __kmp_entry_gtid(); // this might be a new root
6911
6912 /* synchronize parallel initialization (for sibling) */
6913 if (TCR_4(__kmp_init_parallel))
6914 return;
6915 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6916 if (TCR_4(__kmp_init_parallel)) {
6917 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6918 return;
6919 }
6920
6921 /* TODO reinitialization after we have already shut down */
6922 if (TCR_4(__kmp_global.g.g_done)) {
6923 KA_TRACE(
6924 10,
6925 ("__kmp_parallel_initialize: attempt to init while shutting down\n"));
6926 __kmp_infinite_loop();
6927 }
6928
6929 /* jc: The lock __kmp_initz_lock is already held, so calling
6930 __kmp_serial_initialize would cause a deadlock. So we call
6931 __kmp_do_serial_initialize directly. */
6932 if (!__kmp_init_middle) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006933 __kmp_do_middle_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006934 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006935
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00006936#if OMP_50_ENABLED
6937 __kmp_resume_if_hard_paused();
6938#endif
6939
Jonathan Peyton30419822017-05-12 18:01:32 +00006940 /* begin initialization */
6941 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"));
6942 KMP_ASSERT(KMP_UBER_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006943
6944#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00006945 // Save the FP control regs.
6946 // Worker threads will set theirs to these values at thread startup.
6947 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6948 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6949 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006950#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6951
6952#if KMP_OS_UNIX
Jonathan Peyton30419822017-05-12 18:01:32 +00006953#if KMP_HANDLE_SIGNALS
6954 /* must be after __kmp_serial_initialize */
6955 __kmp_install_signals(TRUE);
6956#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006957#endif
6958
Jonathan Peyton30419822017-05-12 18:01:32 +00006959 __kmp_suspend_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006960
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006961#if defined(USE_LOAD_BALANCE)
Jonathan Peyton30419822017-05-12 18:01:32 +00006962 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6963 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6964 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006965#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006966 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6967 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6968 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006969#endif
6970
Jonathan Peyton30419822017-05-12 18:01:32 +00006971 if (__kmp_version) {
6972 __kmp_print_version_2();
6973 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006974
Jonathan Peyton30419822017-05-12 18:01:32 +00006975 /* we have finished parallel initialization */
6976 TCW_SYNC_4(__kmp_init_parallel, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006977
Jonathan Peyton30419822017-05-12 18:01:32 +00006978 KMP_MB();
6979 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006980
Jonathan Peyton30419822017-05-12 18:01:32 +00006981 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006982}
6983
Jim Cownie5e8470a2013-09-27 10:38:44 +00006984/* ------------------------------------------------------------------------ */
6985
Jonathan Peyton30419822017-05-12 18:01:32 +00006986void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
6987 kmp_team_t *team) {
6988 kmp_disp_t *dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006989
Jonathan Peyton30419822017-05-12 18:01:32 +00006990 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006991
Jonathan Peyton30419822017-05-12 18:01:32 +00006992 /* none of the threads have encountered any constructs, yet. */
6993 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006994#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +00006995 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006996#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +00006997 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6998 KMP_DEBUG_ASSERT(dispatch);
6999 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7000 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
7001 // this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007002
Jonathan Peyton30419822017-05-12 18:01:32 +00007003 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007004#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007005 dispatch->th_doacross_buf_idx =
7006 0; /* reset the doacross dispatch buffer counter */
Jonathan Peyton71909c52016-03-02 22:42:06 +00007007#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007008 if (__kmp_env_consistency_check)
7009 __kmp_push_parallel(gtid, team->t.t_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007010
Jonathan Peyton30419822017-05-12 18:01:32 +00007011 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007012}
7013
Jonathan Peyton30419822017-05-12 18:01:32 +00007014void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7015 kmp_team_t *team) {
7016 if (__kmp_env_consistency_check)
7017 __kmp_pop_parallel(gtid, team->t.t_ident);
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +00007018
Jonathan Peyton30419822017-05-12 18:01:32 +00007019 __kmp_finish_implicit_task(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007020}
7021
Jonathan Peyton30419822017-05-12 18:01:32 +00007022int __kmp_invoke_task_func(int gtid) {
7023 int rc;
7024 int tid = __kmp_tid_from_gtid(gtid);
7025 kmp_info_t *this_thr = __kmp_threads[gtid];
7026 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007027
Jonathan Peyton30419822017-05-12 18:01:32 +00007028 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007029#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00007030 if (__itt_stack_caller_create_ptr) {
7031 __kmp_itt_stack_callee_enter(
7032 (__itt_caller)
7033 team->t.t_stack_id); // inform ittnotify about entering user's code
7034 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007035#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007036#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00007037 SSC_MARK_INVOKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007038#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007039
7040#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00007041 void *dummy;
7042 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00007043 ompt_data_t *my_task_data;
7044 ompt_data_t *my_parallel_data;
7045 int ompt_team_size;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007046
Joachim Protze82e94a52017-11-01 10:08:30 +00007047 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00007048 exit_runtime_p = &(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00007049 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00007050 } else {
7051 exit_runtime_p = &dummy;
7052 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007053
Joachim Protze82e94a52017-11-01 10:08:30 +00007054 my_task_data =
7055 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7056 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7057 if (ompt_enabled.ompt_callback_implicit_task) {
7058 ompt_team_size = team->t.t_nproc;
7059 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7060 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
Joachim Protze2b46d302019-01-15 15:36:53 +00007061 __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Joachim Protze9be9cf22018-05-07 12:42:21 +00007062 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00007063 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007064#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007065
Jonathan Peyton30419822017-05-12 18:01:32 +00007066 {
7067 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
7068 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
7069 rc =
7070 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7071 tid, (int)team->t.t_argc, (void **)team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007072#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00007073 ,
7074 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007075#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007076 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00007077#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00007078 *exit_runtime_p = NULL;
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00007079#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007080 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007081
Jim Cownie5e8470a2013-09-27 10:38:44 +00007082#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00007083 if (__itt_stack_caller_create_ptr) {
7084 __kmp_itt_stack_callee_leave(
7085 (__itt_caller)
7086 team->t.t_stack_id); // inform ittnotify about leaving user's code
7087 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007088#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00007089 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007090
Jonathan Peyton30419822017-05-12 18:01:32 +00007091 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007092}
7093
7094#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007095void __kmp_teams_master(int gtid) {
7096 // This routine is called by all master threads in teams construct
7097 kmp_info_t *thr = __kmp_threads[gtid];
7098 kmp_team_t *team = thr->th.th_team;
7099 ident_t *loc = team->t.t_ident;
7100 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7101 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7102 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7103 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7104 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7105// Launch league of teams now, but not let workers execute
7106// (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007107#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00007108 SSC_MARK_FORKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007109#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007110 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
Jonathan Peyton30419822017-05-12 18:01:32 +00007111 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
7112 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007113#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00007114 SSC_MARK_JOINING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007115#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00007116
Jonathan Peyton30419822017-05-12 18:01:32 +00007117 // AC: last parameter "1" eliminates join barrier which won't work because
7118 // worker threads are in a fork barrier waiting for more parallel regions
7119 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00007120#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00007121 ,
7122 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00007123#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007124 ,
7125 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007126}
7127
Jonathan Peyton30419822017-05-12 18:01:32 +00007128int __kmp_invoke_teams_master(int gtid) {
7129 kmp_info_t *this_thr = __kmp_threads[gtid];
7130 kmp_team_t *team = this_thr->th.th_team;
7131#if KMP_DEBUG
7132 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7133 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7134 (void *)__kmp_teams_master);
7135#endif
7136 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7137 __kmp_teams_master(gtid);
7138 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7139 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007140}
7141#endif /* OMP_40_ENABLED */
7142
7143/* this sets the requested number of threads for the next parallel region
Jonathan Peyton30419822017-05-12 18:01:32 +00007144 encountered by this team. since this should be enclosed in the forkjoin
7145 critical section it should avoid race conditions with assymmetrical nested
7146 parallelism */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007147
Jonathan Peyton30419822017-05-12 18:01:32 +00007148void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
7149 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007150
Jonathan Peyton30419822017-05-12 18:01:32 +00007151 if (num_threads > 0)
7152 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007153}
7154
7155#if OMP_40_ENABLED
7156
7157/* this sets the requested number of teams for the teams region and/or
Jonathan Peyton30419822017-05-12 18:01:32 +00007158 the number of threads for the next parallel region encountered */
7159void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
7160 int num_threads) {
7161 kmp_info_t *thr = __kmp_threads[gtid];
7162 KMP_DEBUG_ASSERT(num_teams >= 0);
7163 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007164
Jonathan Peyton30419822017-05-12 18:01:32 +00007165 if (num_teams == 0)
7166 num_teams = 1; // default number of teams is 1.
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007167 if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
Jonathan Peyton30419822017-05-12 18:01:32 +00007168 if (!__kmp_reserve_warn) {
7169 __kmp_reserve_warn = 1;
7170 __kmp_msg(kmp_ms_warning,
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007171 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
Jonathan Peyton30419822017-05-12 18:01:32 +00007172 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007173 }
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007174 num_teams = __kmp_teams_max_nth;
Jonathan Peyton30419822017-05-12 18:01:32 +00007175 }
7176 // Set number of teams (number of threads in the outer "parallel" of the
7177 // teams)
7178 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007179
Jonathan Peyton30419822017-05-12 18:01:32 +00007180 // Remember the number of threads for inner parallel regions
7181 if (num_threads == 0) {
7182 if (!TCR_4(__kmp_init_middle))
7183 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
7184 num_threads = __kmp_avail_proc / num_teams;
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007185 if (num_teams * num_threads > __kmp_teams_max_nth) {
Jonathan Peyton30419822017-05-12 18:01:32 +00007186 // adjust num_threads w/o warning as it is not user setting
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007187 num_threads = __kmp_teams_max_nth / num_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007188 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007189 } else {
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007190 if (num_teams * num_threads > __kmp_teams_max_nth) {
7191 int new_threads = __kmp_teams_max_nth / num_teams;
Jonathan Peyton30419822017-05-12 18:01:32 +00007192 if (!__kmp_reserve_warn) { // user asked for too many threads
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007193 __kmp_reserve_warn = 1; // that conflicts with KMP_TEAMS_THREAD_LIMIT
Jonathan Peyton30419822017-05-12 18:01:32 +00007194 __kmp_msg(kmp_ms_warning,
7195 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7196 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7197 }
7198 num_threads = new_threads;
7199 }
7200 }
7201 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007202}
7203
Jim Cownie5e8470a2013-09-27 10:38:44 +00007204// Set the proc_bind var to use in the following parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007205void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
7206 kmp_info_t *thr = __kmp_threads[gtid];
7207 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007208}
7209
7210#endif /* OMP_40_ENABLED */
7211
7212/* Launch the worker threads into the microtask. */
7213
Jonathan Peyton30419822017-05-12 18:01:32 +00007214void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
7215 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007216
7217#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007218 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007219#endif /* KMP_DEBUG */
7220
Jonathan Peyton30419822017-05-12 18:01:32 +00007221 KMP_DEBUG_ASSERT(team);
7222 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7223 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7224 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007225
Jonathan Peyton30419822017-05-12 18:01:32 +00007226 team->t.t_construct = 0; /* no single directives seen yet */
7227 team->t.t_ordered.dt.t_value =
7228 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007229
Jonathan Peyton30419822017-05-12 18:01:32 +00007230 /* Reset the identifiers on the dispatch buffer */
7231 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7232 if (team->t.t_max_nproc > 1) {
7233 int i;
7234 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7235 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007236#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007237 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00007238#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007239 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007240 } else {
7241 team->t.t_disp_buffer[0].buffer_index = 0;
7242#if OMP_45_ENABLED
7243 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7244#endif
7245 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007246
Jonathan Peyton30419822017-05-12 18:01:32 +00007247 KMP_MB(); /* Flush all pending memory write invalidates. */
7248 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007249
7250#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007251 for (f = 0; f < team->t.t_nproc; f++) {
7252 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7253 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7254 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007255#endif /* KMP_DEBUG */
7256
Jonathan Peyton30419822017-05-12 18:01:32 +00007257 /* release the worker threads so they may begin working */
7258 __kmp_fork_barrier(gtid, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007259}
7260
Jonathan Peyton30419822017-05-12 18:01:32 +00007261void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
7262 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007263
Jonathan Peyton30419822017-05-12 18:01:32 +00007264 KMP_DEBUG_ASSERT(team);
7265 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7266 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7267 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007268
Jonathan Peyton30419822017-05-12 18:01:32 +00007269/* Join barrier after fork */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007270
7271#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007272 if (__kmp_threads[gtid] &&
7273 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7274 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7275 __kmp_threads[gtid]);
7276 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7277 "team->t.t_nproc=%d\n",
7278 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7279 team->t.t_nproc);
7280 __kmp_print_structure();
7281 }
7282 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7283 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007284#endif /* KMP_DEBUG */
7285
Jonathan Peyton30419822017-05-12 18:01:32 +00007286 __kmp_join_barrier(gtid); /* wait for everyone */
Joachim Protze82e94a52017-11-01 10:08:30 +00007287#if OMPT_SUPPORT
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007288 if (ompt_enabled.enabled &&
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00007289 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007290 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7291 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00007292 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Joachim Protze82e94a52017-11-01 10:08:30 +00007293#if OMPT_OPTIONAL
7294 void *codeptr = NULL;
7295 if (KMP_MASTER_TID(ds_tid) &&
7296 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7297 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7298 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7299
7300 if (ompt_enabled.ompt_callback_sync_region_wait) {
7301 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007302 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00007303 }
7304 if (ompt_enabled.ompt_callback_sync_region) {
7305 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007306 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00007307 }
7308#endif
7309 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7310 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
Joachim Protze2b46d302019-01-15 15:36:53 +00007311 ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Joachim Protze82e94a52017-11-01 10:08:30 +00007312 }
Joachim Protze82e94a52017-11-01 10:08:30 +00007313 }
7314#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007315
Jonathan Peyton30419822017-05-12 18:01:32 +00007316 KMP_MB(); /* Flush all pending memory write invalidates. */
7317 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007318}
7319
Jim Cownie5e8470a2013-09-27 10:38:44 +00007320/* ------------------------------------------------------------------------ */
7321
7322#ifdef USE_LOAD_BALANCE
7323
Jim Cownie5e8470a2013-09-27 10:38:44 +00007324// Return the worker threads actively spinning in the hot team, if we
7325// are at the outermost level of parallelism. Otherwise, return 0.
Jonathan Peyton30419822017-05-12 18:01:32 +00007326static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7327 int i;
7328 int retval;
7329 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007330
Jonathan Peyton30419822017-05-12 18:01:32 +00007331 if (root->r.r_active) {
7332 return 0;
7333 }
7334 hot_team = root->r.r_hot_team;
7335 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7336 return hot_team->t.t_nproc - 1; // Don't count master thread
7337 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007338
Jonathan Peyton30419822017-05-12 18:01:32 +00007339 // Skip the master thread - it is accounted for elsewhere.
7340 retval = 0;
7341 for (i = 1; i < hot_team->t.t_nproc; i++) {
7342 if (hot_team->t.t_threads[i]->th.th_active) {
7343 retval++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007344 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007345 }
7346 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007347}
7348
Jim Cownie5e8470a2013-09-27 10:38:44 +00007349// Perform an automatic adjustment to the number of
7350// threads used by the next parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007351static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
7352 int retval;
7353 int pool_active;
7354 int hot_team_active;
7355 int team_curr_active;
7356 int system_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007357
Jonathan Peyton30419822017-05-12 18:01:32 +00007358 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7359 set_nproc));
7360 KMP_DEBUG_ASSERT(root);
7361 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7362 ->th.th_current_task->td_icvs.dynamic == TRUE);
7363 KMP_DEBUG_ASSERT(set_nproc > 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007364
Jonathan Peyton30419822017-05-12 18:01:32 +00007365 if (set_nproc == 1) {
7366 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"));
7367 return 1;
7368 }
7369
7370 // Threads that are active in the thread pool, active in the hot team for this
7371 // particular root (if we are at the outer par level), and the currently
7372 // executing thread (to become the master) are available to add to the new
7373 // team, but are currently contributing to the system load, and must be
7374 // accounted for.
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00007375 pool_active = __kmp_thread_pool_active_nth;
Jonathan Peyton30419822017-05-12 18:01:32 +00007376 hot_team_active = __kmp_active_hot_team_nproc(root);
7377 team_curr_active = pool_active + hot_team_active + 1;
7378
7379 // Check the system load.
7380 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7381 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "
7382 "hot team active = %d\n",
7383 system_active, pool_active, hot_team_active));
7384
7385 if (system_active < 0) {
7386 // There was an error reading the necessary info from /proc, so use the
7387 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
7388 // = dynamic_thread_limit, we shouldn't wind up getting back here.
7389 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7390 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit");
7391
7392 // Make this call behave like the thread limit algorithm.
7393 retval = __kmp_avail_proc - __kmp_nth +
7394 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7395 if (retval > set_nproc) {
7396 retval = set_nproc;
7397 }
7398 if (retval < KMP_MIN_NTH) {
7399 retval = KMP_MIN_NTH;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007400 }
7401
Jonathan Peyton30419822017-05-12 18:01:32 +00007402 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7403 retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007404 return retval;
Jonathan Peyton30419822017-05-12 18:01:32 +00007405 }
7406
7407 // There is a slight delay in the load balance algorithm in detecting new
7408 // running procs. The real system load at this instant should be at least as
7409 // large as the #active omp thread that are available to add to the team.
7410 if (system_active < team_curr_active) {
7411 system_active = team_curr_active;
7412 }
7413 retval = __kmp_avail_proc - system_active + team_curr_active;
7414 if (retval > set_nproc) {
7415 retval = set_nproc;
7416 }
7417 if (retval < KMP_MIN_NTH) {
7418 retval = KMP_MIN_NTH;
7419 }
7420
7421 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7422 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007423} // __kmp_load_balance_nproc()
7424
7425#endif /* USE_LOAD_BALANCE */
7426
Jim Cownie5e8470a2013-09-27 10:38:44 +00007427/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007428
7429/* NOTE: this is called with the __kmp_init_lock held */
Jonathan Peyton30419822017-05-12 18:01:32 +00007430void __kmp_cleanup(void) {
7431 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007432
Jonathan Peyton30419822017-05-12 18:01:32 +00007433 KA_TRACE(10, ("__kmp_cleanup: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007434
Jonathan Peyton30419822017-05-12 18:01:32 +00007435 if (TCR_4(__kmp_init_parallel)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007436#if KMP_HANDLE_SIGNALS
Jonathan Peyton30419822017-05-12 18:01:32 +00007437 __kmp_remove_signals();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007438#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007439 TCW_4(__kmp_init_parallel, FALSE);
7440 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007441
Jonathan Peyton30419822017-05-12 18:01:32 +00007442 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007443#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007444 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007445#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton30419822017-05-12 18:01:32 +00007446 __kmp_cleanup_hierarchy();
7447 TCW_4(__kmp_init_middle, FALSE);
7448 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007449
Jonathan Peyton30419822017-05-12 18:01:32 +00007450 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007451
Jonathan Peyton30419822017-05-12 18:01:32 +00007452 if (__kmp_init_serial) {
7453 __kmp_runtime_destroy();
7454 __kmp_init_serial = FALSE;
7455 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007456
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00007457 __kmp_cleanup_threadprivate_caches();
7458
Jonathan Peyton30419822017-05-12 18:01:32 +00007459 for (f = 0; f < __kmp_threads_capacity; f++) {
7460 if (__kmp_root[f] != NULL) {
7461 __kmp_free(__kmp_root[f]);
7462 __kmp_root[f] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007463 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007464 }
7465 __kmp_free(__kmp_threads);
7466 // __kmp_threads and __kmp_root were allocated at once, as single block, so
7467 // there is no need in freeing __kmp_root.
7468 __kmp_threads = NULL;
7469 __kmp_root = NULL;
7470 __kmp_threads_capacity = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007471
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007472#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00007473 __kmp_cleanup_indirect_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007474#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007475 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007476#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007477
Jonathan Peyton30419822017-05-12 18:01:32 +00007478#if KMP_AFFINITY_SUPPORTED
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00007479 KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file));
Jonathan Peyton30419822017-05-12 18:01:32 +00007480 __kmp_cpuinfo_file = NULL;
7481#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007482
Jonathan Peyton30419822017-05-12 18:01:32 +00007483#if KMP_USE_ADAPTIVE_LOCKS
7484#if KMP_DEBUG_ADAPTIVE_LOCKS
7485 __kmp_print_speculative_stats();
7486#endif
7487#endif
7488 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7489 __kmp_nested_nth.nth = NULL;
7490 __kmp_nested_nth.size = 0;
7491 __kmp_nested_nth.used = 0;
7492 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7493 __kmp_nested_proc_bind.bind_types = NULL;
7494 __kmp_nested_proc_bind.size = 0;
7495 __kmp_nested_proc_bind.used = 0;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00007496#if OMP_50_ENABLED
7497 if (__kmp_affinity_format) {
7498 KMP_INTERNAL_FREE(__kmp_affinity_format);
7499 __kmp_affinity_format = NULL;
7500 }
7501#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007502
Jonathan Peyton30419822017-05-12 18:01:32 +00007503 __kmp_i18n_catclose();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007504
Jonathan Peytonf6399362018-07-09 17:51:13 +00007505#if KMP_USE_HIER_SCHED
7506 __kmp_hier_scheds.deallocate();
7507#endif
7508
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007509#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007510 __kmp_stats_fini();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007511#endif
7512
Jonathan Peyton30419822017-05-12 18:01:32 +00007513 KA_TRACE(10, ("__kmp_cleanup: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007514}
7515
7516/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007517
7518int __kmp_ignore_mppbeg(void) {
7519 char *env;
7520
7521 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) {
7522 if (__kmp_str_match_false(env))
7523 return FALSE;
7524 }
7525 // By default __kmpc_begin() is no-op.
7526 return TRUE;
7527}
7528
7529int __kmp_ignore_mppend(void) {
7530 char *env;
7531
7532 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) {
7533 if (__kmp_str_match_false(env))
7534 return FALSE;
7535 }
7536 // By default __kmpc_end() is no-op.
7537 return TRUE;
7538}
7539
7540void __kmp_internal_begin(void) {
7541 int gtid;
7542 kmp_root_t *root;
7543
7544 /* this is a very important step as it will register new sibling threads
7545 and assign these new uber threads a new gtid */
7546 gtid = __kmp_entry_gtid();
7547 root = __kmp_threads[gtid]->th.th_root;
7548 KMP_ASSERT(KMP_UBER_GTID(gtid));
7549
7550 if (root->r.r_begin)
7551 return;
7552 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7553 if (root->r.r_begin) {
7554 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7555 return;
7556 }
7557
7558 root->r.r_begin = TRUE;
7559
7560 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7561}
7562
Jim Cownie5e8470a2013-09-27 10:38:44 +00007563/* ------------------------------------------------------------------------ */
7564
Jonathan Peyton30419822017-05-12 18:01:32 +00007565void __kmp_user_set_library(enum library_type arg) {
7566 int gtid;
7567 kmp_root_t *root;
7568 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007569
Jonathan Peyton30419822017-05-12 18:01:32 +00007570 /* first, make sure we are initialized so we can get our gtid */
7571
7572 gtid = __kmp_entry_gtid();
7573 thread = __kmp_threads[gtid];
7574
7575 root = thread->th.th_root;
7576
7577 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7578 library_serial));
7579 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
7580 thread */
7581 KMP_WARNING(SetLibraryIncorrectCall);
7582 return;
7583 }
7584
7585 switch (arg) {
7586 case library_serial:
7587 thread->th.th_set_nproc = 0;
7588 set__nproc(thread, 1);
7589 break;
7590 case library_turnaround:
7591 thread->th.th_set_nproc = 0;
7592 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7593 : __kmp_dflt_team_nth_ub);
7594 break;
7595 case library_throughput:
7596 thread->th.th_set_nproc = 0;
7597 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7598 : __kmp_dflt_team_nth_ub);
7599 break;
7600 default:
7601 KMP_FATAL(UnknownLibraryType, arg);
7602 }
7603
7604 __kmp_aux_set_library(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007605}
7606
Jonathan Peyton30419822017-05-12 18:01:32 +00007607void __kmp_aux_set_stacksize(size_t arg) {
7608 if (!__kmp_init_serial)
7609 __kmp_serial_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007610
7611#if KMP_OS_DARWIN
Jonathan Peyton30419822017-05-12 18:01:32 +00007612 if (arg & (0x1000 - 1)) {
7613 arg &= ~(0x1000 - 1);
7614 if (arg + 0x1000) /* check for overflow if we round up */
7615 arg += 0x1000;
7616 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007617#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007618 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007619
Jonathan Peyton30419822017-05-12 18:01:32 +00007620 /* only change the default stacksize before the first parallel region */
7621 if (!TCR_4(__kmp_init_parallel)) {
7622 size_t value = arg; /* argument is in bytes */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007623
Jonathan Peyton30419822017-05-12 18:01:32 +00007624 if (value < __kmp_sys_min_stksize)
7625 value = __kmp_sys_min_stksize;
7626 else if (value > KMP_MAX_STKSIZE)
7627 value = KMP_MAX_STKSIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007628
Jonathan Peyton30419822017-05-12 18:01:32 +00007629 __kmp_stksize = value;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007630
Jonathan Peyton30419822017-05-12 18:01:32 +00007631 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7632 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007633
Jonathan Peyton30419822017-05-12 18:01:32 +00007634 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007635}
7636
7637/* set the behaviour of the runtime library */
7638/* TODO this can cause some odd behaviour with sibling parallelism... */
Jonathan Peyton30419822017-05-12 18:01:32 +00007639void __kmp_aux_set_library(enum library_type arg) {
7640 __kmp_library = arg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007641
Jonathan Peyton30419822017-05-12 18:01:32 +00007642 switch (__kmp_library) {
7643 case library_serial: {
7644 KMP_INFORM(LibraryIsSerial);
7645 (void)__kmp_change_library(TRUE);
7646 } break;
7647 case library_turnaround:
7648 (void)__kmp_change_library(TRUE);
7649 break;
7650 case library_throughput:
7651 (void)__kmp_change_library(FALSE);
7652 break;
7653 default:
7654 KMP_FATAL(UnknownLibraryType, arg);
7655 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007656}
7657
Jonathan Peyton6d88e042018-12-13 23:14:24 +00007658/* Getting team information common for all team API */
7659// Returns NULL if not in teams construct
7660static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) {
7661 kmp_info_t *thr = __kmp_entry_thread();
7662 teams_serialized = 0;
7663 if (thr->th.th_teams_microtask) {
7664 kmp_team_t *team = thr->th.th_team;
7665 int tlevel = thr->th.th_teams_level; // the level of the teams construct
7666 int ii = team->t.t_level;
7667 teams_serialized = team->t.t_serialized;
7668 int level = tlevel + 1;
7669 KMP_DEBUG_ASSERT(ii >= tlevel);
7670 while (ii > level) {
7671 for (teams_serialized = team->t.t_serialized;
7672 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
7673 }
7674 if (team->t.t_serialized && (!teams_serialized)) {
7675 team = team->t.t_parent;
7676 continue;
7677 }
7678 if (ii > level) {
7679 team = team->t.t_parent;
7680 ii--;
7681 }
7682 }
7683 return team;
7684 }
7685 return NULL;
7686}
7687
7688int __kmp_aux_get_team_num() {
7689 int serialized;
7690 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7691 if (team) {
7692 if (serialized > 1) {
7693 return 0; // teams region is serialized ( 1 team of 1 thread ).
7694 } else {
7695 return team->t.t_master_tid;
7696 }
7697 }
7698 return 0;
7699}
7700
7701int __kmp_aux_get_num_teams() {
7702 int serialized;
7703 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7704 if (team) {
7705 if (serialized > 1) {
7706 return 1;
7707 } else {
7708 return team->t.t_parent->t.t_nproc;
7709 }
7710 }
7711 return 1;
7712}
7713
7714/* ------------------------------------------------------------------------ */
7715
7716#if OMP_50_ENABLED
7717/*
7718 * Affinity Format Parser
7719 *
7720 * Field is in form of: %[[[0].]size]type
7721 * % and type are required (%% means print a literal '%')
7722 * type is either single char or long name surrounded by {},
7723 * e.g., N or {num_threads}
7724 * 0 => leading zeros
7725 * . => right justified when size is specified
7726 * by default output is left justified
7727 * size is the *minimum* field length
7728 * All other characters are printed as is
7729 *
7730 * Available field types:
7731 * L {thread_level} - omp_get_level()
7732 * n {thread_num} - omp_get_thread_num()
7733 * h {host} - name of host machine
7734 * P {process_id} - process id (integer)
7735 * T {thread_identifier} - native thread identifier (integer)
7736 * N {num_threads} - omp_get_num_threads()
7737 * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1)
7738 * a {thread_affinity} - comma separated list of integers or integer ranges
7739 * (values of affinity mask)
7740 *
7741 * Implementation-specific field types can be added
7742 * If a type is unknown, print "undefined"
7743*/
7744
7745// Structure holding the short name, long name, and corresponding data type
7746// for snprintf. A table of these will represent the entire valid keyword
7747// field types.
7748typedef struct kmp_affinity_format_field_t {
7749 char short_name; // from spec e.g., L -> thread level
7750 const char *long_name; // from spec thread_level -> thread level
7751 char field_format; // data type for snprintf (typically 'd' or 's'
7752 // for integer or string)
7753} kmp_affinity_format_field_t;
7754
7755static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
7756#if KMP_AFFINITY_SUPPORTED
7757 {'A', "thread_affinity", 's'},
7758#endif
7759 {'t', "team_num", 'd'},
7760 {'T', "num_teams", 'd'},
7761 {'L', "nesting_level", 'd'},
7762 {'n', "thread_num", 'd'},
7763 {'N', "num_threads", 'd'},
7764 {'a', "ancestor_tnum", 'd'},
7765 {'H', "host", 's'},
7766 {'P', "process_id", 'd'},
7767 {'i', "native_thread_id", 'd'}};
7768
7769// Return the number of characters it takes to hold field
7770static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th,
7771 const char **ptr,
7772 kmp_str_buf_t *field_buffer) {
7773 int rc, format_index, field_value;
7774 const char *width_left, *width_right;
7775 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
7776 static const int FORMAT_SIZE = 20;
7777 char format[FORMAT_SIZE] = {0};
7778 char absolute_short_name = 0;
7779
7780 KMP_DEBUG_ASSERT(gtid >= 0);
7781 KMP_DEBUG_ASSERT(th);
7782 KMP_DEBUG_ASSERT(**ptr == '%');
7783 KMP_DEBUG_ASSERT(field_buffer);
7784
7785 __kmp_str_buf_clear(field_buffer);
7786
7787 // Skip the initial %
7788 (*ptr)++;
7789
7790 // Check for %% first
7791 if (**ptr == '%') {
7792 __kmp_str_buf_cat(field_buffer, "%", 1);
7793 (*ptr)++; // skip over the second %
7794 return 1;
7795 }
7796
7797 // Parse field modifiers if they are present
7798 pad_zeros = false;
7799 if (**ptr == '0') {
7800 pad_zeros = true;
7801 (*ptr)++; // skip over 0
7802 }
7803 right_justify = false;
7804 if (**ptr == '.') {
7805 right_justify = true;
7806 (*ptr)++; // skip over .
7807 }
7808 // Parse width of field: [width_left, width_right)
7809 width_left = width_right = NULL;
7810 if (**ptr >= '0' && **ptr <= '9') {
7811 width_left = *ptr;
7812 SKIP_DIGITS(*ptr);
7813 width_right = *ptr;
7814 }
7815
7816 // Create the format for KMP_SNPRINTF based on flags parsed above
7817 format_index = 0;
7818 format[format_index++] = '%';
7819 if (!right_justify)
7820 format[format_index++] = '-';
7821 if (pad_zeros)
7822 format[format_index++] = '0';
7823 if (width_left && width_right) {
7824 int i = 0;
7825 // Only allow 8 digit number widths.
7826 // This also prevents overflowing format variable
7827 while (i < 8 && width_left < width_right) {
7828 format[format_index++] = *width_left;
7829 width_left++;
7830 i++;
7831 }
7832 }
7833
7834 // Parse a name (long or short)
7835 // Canonicalize the name into absolute_short_name
7836 found_valid_name = false;
7837 parse_long_name = (**ptr == '{');
7838 if (parse_long_name)
7839 (*ptr)++; // skip initial left brace
7840 for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) /
7841 sizeof(__kmp_affinity_format_table[0]);
7842 ++i) {
7843 char short_name = __kmp_affinity_format_table[i].short_name;
7844 const char *long_name = __kmp_affinity_format_table[i].long_name;
7845 char field_format = __kmp_affinity_format_table[i].field_format;
7846 if (parse_long_name) {
7847 int length = KMP_STRLEN(long_name);
7848 if (strncmp(*ptr, long_name, length) == 0) {
7849 found_valid_name = true;
7850 (*ptr) += length; // skip the long name
7851 }
7852 } else if (**ptr == short_name) {
7853 found_valid_name = true;
7854 (*ptr)++; // skip the short name
7855 }
7856 if (found_valid_name) {
7857 format[format_index++] = field_format;
7858 format[format_index++] = '\0';
7859 absolute_short_name = short_name;
7860 break;
7861 }
7862 }
7863 if (parse_long_name) {
7864 if (**ptr != '}') {
7865 absolute_short_name = 0;
7866 } else {
7867 (*ptr)++; // skip over the right brace
7868 }
7869 }
7870
7871 // Attempt to fill the buffer with the requested
7872 // value using snprintf within __kmp_str_buf_print()
7873 switch (absolute_short_name) {
7874 case 't':
7875 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
7876 break;
7877 case 'T':
7878 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
7879 break;
7880 case 'L':
7881 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
7882 break;
7883 case 'n':
7884 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
7885 break;
7886 case 'H': {
7887 static const int BUFFER_SIZE = 256;
7888 char buf[BUFFER_SIZE];
7889 __kmp_expand_host_name(buf, BUFFER_SIZE);
7890 rc = __kmp_str_buf_print(field_buffer, format, buf);
7891 } break;
7892 case 'P':
7893 rc = __kmp_str_buf_print(field_buffer, format, getpid());
7894 break;
7895 case 'i':
7896 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
7897 break;
7898 case 'N':
7899 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
7900 break;
7901 case 'a':
7902 field_value =
7903 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
7904 rc = __kmp_str_buf_print(field_buffer, format, field_value);
7905 break;
7906#if KMP_AFFINITY_SUPPORTED
7907 case 'A': {
7908 kmp_str_buf_t buf;
7909 __kmp_str_buf_init(&buf);
7910 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
7911 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
7912 __kmp_str_buf_free(&buf);
7913 } break;
7914#endif
7915 default:
7916 // According to spec, If an implementation does not have info for field
7917 // type, then "undefined" is printed
7918 rc = __kmp_str_buf_print(field_buffer, "%s", "undefined");
7919 // Skip the field
7920 if (parse_long_name) {
7921 SKIP_TOKEN(*ptr);
7922 if (**ptr == '}')
7923 (*ptr)++;
7924 } else {
7925 (*ptr)++;
7926 }
7927 }
7928
7929 KMP_ASSERT(format_index <= FORMAT_SIZE);
7930 return rc;
7931}
7932
7933/*
7934 * Return number of characters needed to hold the affinity string
7935 * (not including null byte character)
7936 * The resultant string is printed to buffer, which the caller can then
7937 * handle afterwards
7938*/
7939size_t __kmp_aux_capture_affinity(int gtid, const char *format,
7940 kmp_str_buf_t *buffer) {
7941 const char *parse_ptr;
7942 size_t retval;
7943 const kmp_info_t *th;
7944 kmp_str_buf_t field;
7945
7946 KMP_DEBUG_ASSERT(buffer);
7947 KMP_DEBUG_ASSERT(gtid >= 0);
7948
7949 __kmp_str_buf_init(&field);
7950 __kmp_str_buf_clear(buffer);
7951
7952 th = __kmp_threads[gtid];
7953 retval = 0;
7954
7955 // If format is NULL or zero-length string, then we use
7956 // affinity-format-var ICV
7957 parse_ptr = format;
7958 if (parse_ptr == NULL || *parse_ptr == '\0') {
7959 parse_ptr = __kmp_affinity_format;
7960 }
7961 KMP_DEBUG_ASSERT(parse_ptr);
7962
7963 while (*parse_ptr != '\0') {
7964 // Parse a field
7965 if (*parse_ptr == '%') {
7966 // Put field in the buffer
7967 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
7968 __kmp_str_buf_catbuf(buffer, &field);
7969 retval += rc;
7970 } else {
7971 // Put literal character in buffer
7972 __kmp_str_buf_cat(buffer, parse_ptr, 1);
7973 retval++;
7974 parse_ptr++;
7975 }
7976 }
7977 __kmp_str_buf_free(&field);
7978 return retval;
7979}
7980
7981// Displays the affinity string to stdout
7982void __kmp_aux_display_affinity(int gtid, const char *format) {
7983 kmp_str_buf_t buf;
7984 __kmp_str_buf_init(&buf);
7985 __kmp_aux_capture_affinity(gtid, format, &buf);
7986 __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str);
7987 __kmp_str_buf_free(&buf);
7988}
7989#endif // OMP_50_ENABLED
7990
Jim Cownie5e8470a2013-09-27 10:38:44 +00007991/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007992
Jonathan Peyton30419822017-05-12 18:01:32 +00007993void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
7994 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007995#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007996 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007997#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007998 int bt_set;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007999
Jonathan Peyton30419822017-05-12 18:01:32 +00008000 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00008001
Jonathan Peyton30419822017-05-12 18:01:32 +00008002 /* Normalize and set blocktime for the teams */
8003 if (blocktime < KMP_MIN_BLOCKTIME)
8004 blocktime = KMP_MIN_BLOCKTIME;
8005 else if (blocktime > KMP_MAX_BLOCKTIME)
8006 blocktime = KMP_MAX_BLOCKTIME;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008007
Jonathan Peyton30419822017-05-12 18:01:32 +00008008 set__blocktime_team(thread->th.th_team, tid, blocktime);
8009 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
Jim Cownie5e8470a2013-09-27 10:38:44 +00008010
Jonathan Peytone1c7c132016-10-07 18:12:19 +00008011#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00008012 /* Calculate and set blocktime intervals for the teams */
8013 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
Jim Cownie5e8470a2013-09-27 10:38:44 +00008014
Jonathan Peyton30419822017-05-12 18:01:32 +00008015 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8016 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00008017#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00008018
Jonathan Peyton30419822017-05-12 18:01:32 +00008019 /* Set whether blocktime has been set to "TRUE" */
8020 bt_set = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008021
Jonathan Peyton30419822017-05-12 18:01:32 +00008022 set__bt_set_team(thread->th.th_team, tid, bt_set);
8023 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00008024#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00008025 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8026 "bt_intervals=%d, monitor_updates=%d\n",
8027 __kmp_gtid_from_tid(tid, thread->th.th_team),
8028 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8029 __kmp_monitor_wakeups));
Samuel Antao33515192016-10-20 13:20:17 +00008030#else
Jonathan Peyton30419822017-05-12 18:01:32 +00008031 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8032 __kmp_gtid_from_tid(tid, thread->th.th_team),
8033 thread->th.th_team->t.t_id, tid, blocktime));
Jonathan Peytone1c7c132016-10-07 18:12:19 +00008034#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00008035}
8036
Jonathan Peyton30419822017-05-12 18:01:32 +00008037void __kmp_aux_set_defaults(char const *str, int len) {
8038 if (!__kmp_init_serial) {
8039 __kmp_serial_initialize();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00008040 }
Jonathan Peyton30419822017-05-12 18:01:32 +00008041 __kmp_env_initialize(str);
Jim Cownie5e8470a2013-09-27 10:38:44 +00008042
Jonathan Peyton30419822017-05-12 18:01:32 +00008043 if (__kmp_settings
Jim Cownie5e8470a2013-09-27 10:38:44 +00008044#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00008045 || __kmp_display_env || __kmp_display_env_verbose
Jim Cownie5e8470a2013-09-27 10:38:44 +00008046#endif // OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00008047 ) {
8048 __kmp_env_print();
8049 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00008050} // __kmp_aux_set_defaults
8051
8052/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00008053/* internal fast reduction routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00008054
Jim Cownie5e8470a2013-09-27 10:38:44 +00008055PACKED_REDUCTION_METHOD_T
Jonathan Peyton30419822017-05-12 18:01:32 +00008056__kmp_determine_reduction_method(
8057 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
8058 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
8059 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00008060
Jonathan Peyton30419822017-05-12 18:01:32 +00008061 // Default reduction method: critical construct ( lck != NULL, like in current
8062 // PAROPT )
8063 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
8064 // can be selected by RTL
8065 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
8066 // can be selected by RTL
8067 // Finally, it's up to OpenMP RTL to make a decision on which method to select
8068 // among generated by PAROPT.
Jim Cownie5e8470a2013-09-27 10:38:44 +00008069
Jonathan Peyton30419822017-05-12 18:01:32 +00008070 PACKED_REDUCTION_METHOD_T retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008071
Jonathan Peyton30419822017-05-12 18:01:32 +00008072 int team_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008073
Jonathan Peyton30419822017-05-12 18:01:32 +00008074 KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 )
8075 KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )
Jim Cownie5e8470a2013-09-27 10:38:44 +00008076
Jonathan Peyton30419822017-05-12 18:01:32 +00008077#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8078 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
8079#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
Jim Cownie5e8470a2013-09-27 10:38:44 +00008080
Jonathan Peyton30419822017-05-12 18:01:32 +00008081 retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008082
Jonathan Peyton30419822017-05-12 18:01:32 +00008083 // another choice of getting a team size (with 1 dynamic deference) is slower
8084 team_size = __kmp_get_team_num_threads(global_tid);
8085 if (team_size == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00008086
Jonathan Peyton30419822017-05-12 18:01:32 +00008087 retval = empty_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008088
Jonathan Peyton30419822017-05-12 18:01:32 +00008089 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00008090
Jonathan Peyton30419822017-05-12 18:01:32 +00008091 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008092
Jonathan Peyton30419822017-05-12 18:01:32 +00008093#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie5e8470a2013-09-27 10:38:44 +00008094
Kamil Rytarowskia56ac942018-12-09 16:40:33 +00008095#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
Kamil Rytarowski7e1ea992018-12-09 16:46:48 +00008096 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
Jim Cownie5e8470a2013-09-27 10:38:44 +00008097
Jonathan Peyton30419822017-05-12 18:01:32 +00008098 int teamsize_cutoff = 4;
Jonathan Peyton91b78702015-06-08 19:39:07 +00008099
Jonathan Peyton492e0a32017-06-13 17:17:26 +00008100#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00008101 if (__kmp_mic_type != non_mic) {
8102 teamsize_cutoff = 8;
8103 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00008104#endif
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00008105 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jonathan Peyton30419822017-05-12 18:01:32 +00008106 if (tree_available) {
8107 if (team_size <= teamsize_cutoff) {
8108 if (atomic_available) {
8109 retval = atomic_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008110 }
Jonathan Peyton30419822017-05-12 18:01:32 +00008111 } else {
8112 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8113 }
8114 } else if (atomic_available) {
8115 retval = atomic_reduce_block;
8116 }
8117#else
8118#error "Unknown or unsupported OS"
Jonathan Peyton17e53b92018-12-10 18:26:50 +00008119#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
8120 // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
Jim Cownie5e8470a2013-09-27 10:38:44 +00008121
Jonathan Peyton30419822017-05-12 18:01:32 +00008122#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8123
Andrey Churbanov855d0982018-11-07 12:27:38 +00008124#if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_HURD
Jonathan Peyton30419822017-05-12 18:01:32 +00008125
8126 // basic tuning
8127
8128 if (atomic_available) {
8129 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
8130 retval = atomic_reduce_block;
8131 }
8132 } // otherwise: use critical section
8133
8134#elif KMP_OS_DARWIN
8135
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00008136 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jonathan Peyton30419822017-05-12 18:01:32 +00008137 if (atomic_available && (num_vars <= 3)) {
8138 retval = atomic_reduce_block;
8139 } else if (tree_available) {
8140 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
8141 (reduce_size < (2000 * sizeof(kmp_real64)))) {
8142 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8143 }
8144 } // otherwise: use critical section
8145
8146#else
8147#error "Unknown or unsupported OS"
8148#endif
8149
8150#else
8151#error "Unknown or unsupported architecture"
8152#endif
8153 }
8154
8155 // KMP_FORCE_REDUCTION
8156
8157 // If the team is serialized (team_size == 1), ignore the forced reduction
8158 // method and stay with the unsynchronized method (empty_reduce_block)
8159 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8160 team_size != 1) {
8161
8162 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8163
8164 int atomic_available, tree_available;
8165
8166 switch ((forced_retval = __kmp_force_reduction_method)) {
8167 case critical_reduce_block:
8168 KMP_ASSERT(lck); // lck should be != 0
8169 break;
8170
8171 case atomic_reduce_block:
8172 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8173 if (!atomic_available) {
8174 KMP_WARNING(RedMethodNotSupported, "atomic");
8175 forced_retval = critical_reduce_block;
8176 }
8177 break;
8178
8179 case tree_reduce_block:
8180 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8181 if (!tree_available) {
8182 KMP_WARNING(RedMethodNotSupported, "tree");
8183 forced_retval = critical_reduce_block;
8184 } else {
8185#if KMP_FAST_REDUCTION_BARRIER
8186 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8187#endif
8188 }
8189 break;
8190
8191 default:
8192 KMP_ASSERT(0); // "unsupported method specified"
Jim Cownie5e8470a2013-09-27 10:38:44 +00008193 }
8194
Jonathan Peyton30419822017-05-12 18:01:32 +00008195 retval = forced_retval;
8196 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00008197
Jonathan Peyton30419822017-05-12 18:01:32 +00008198 KA_TRACE(10, ("reduction method selected=%08x\n", retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00008199
Jonathan Peyton30419822017-05-12 18:01:32 +00008200#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8201#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8202
8203 return (retval);
Jim Cownie5e8470a2013-09-27 10:38:44 +00008204}
8205
8206// this function is for testing set/get/determine reduce method
Jonathan Peyton30419822017-05-12 18:01:32 +00008207kmp_int32 __kmp_get_reduce_method(void) {
8208 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
Jim Cownie5e8470a2013-09-27 10:38:44 +00008209}
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00008210
8211#if OMP_50_ENABLED
8212
8213// Soft pause sets up threads to ignore blocktime and just go to sleep.
8214// Spin-wait code checks __kmp_pause_status and reacts accordingly.
8215void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8216
8217// Hard pause shuts down the runtime completely. Resume happens naturally when
8218// OpenMP is used subsequently.
8219void __kmp_hard_pause() {
8220 __kmp_pause_status = kmp_hard_paused;
8221 __kmp_internal_end_thread(-1);
8222}
8223
8224// Soft resume sets __kmp_pause_status, and wakes up all threads.
8225void __kmp_resume_if_soft_paused() {
8226 if (__kmp_pause_status == kmp_soft_paused) {
8227 __kmp_pause_status = kmp_not_paused;
8228
8229 for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8230 kmp_info_t *thread = __kmp_threads[gtid];
8231 if (thread) { // Wake it if sleeping
8232 kmp_flag_64 fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
8233 if (fl.is_sleeping())
8234 fl.resume(gtid);
8235 else if (__kmp_try_suspend_mx(thread)) { // got suspend lock
8236 __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep
8237 } else { // thread holds the lock and may sleep soon
8238 do { // until either the thread sleeps, or we can get the lock
8239 if (fl.is_sleeping()) {
8240 fl.resume(gtid);
8241 break;
8242 } else if (__kmp_try_suspend_mx(thread)) {
8243 __kmp_unlock_suspend_mx(thread);
8244 break;
8245 }
8246 } while (1);
8247 }
8248 }
8249 }
8250 }
8251}
8252
8253// This function is called via __kmpc_pause_resource. Returns 0 if successful.
8254// TODO: add warning messages
8255int __kmp_pause_resource(kmp_pause_status_t level) {
8256 if (level == kmp_not_paused) { // requesting resume
8257 if (__kmp_pause_status == kmp_not_paused) {
8258 // error message about runtime not being paused, so can't resume
8259 return 1;
8260 } else {
8261 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8262 __kmp_pause_status == kmp_hard_paused);
8263 __kmp_pause_status = kmp_not_paused;
8264 return 0;
8265 }
8266 } else if (level == kmp_soft_paused) { // requesting soft pause
8267 if (__kmp_pause_status != kmp_not_paused) {
8268 // error message about already being paused
8269 return 1;
8270 } else {
8271 __kmp_soft_pause();
8272 return 0;
8273 }
8274 } else if (level == kmp_hard_paused) { // requesting hard pause
8275 if (__kmp_pause_status != kmp_not_paused) {
8276 // error message about already being paused
8277 return 1;
8278 } else {
8279 __kmp_hard_pause();
8280 return 0;
8281 }
8282 } else {
8283 // error message about invalid level
8284 return 1;
8285 }
8286}
8287
8288#endif // OMP_50_ENABLED