blob: 2d337b46370bd2d73bcba5086b9935acc274c100 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_runtime.cpp -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
Chandler Carruth57b08b02019-01-19 10:56:40 +00007// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Jim Cownie5e8470a2013-09-27 10:38:44 +000010//
11//===----------------------------------------------------------------------===//
12
Jim Cownie5e8470a2013-09-27 10:38:44 +000013#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000014#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000015#include "kmp_atomic.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000016#include "kmp_environment.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000017#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000018#include "kmp_i18n.h"
19#include "kmp_io.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000020#include "kmp_itt.h"
21#include "kmp_settings.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000022#include "kmp_stats.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000023#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000024#include "kmp_wait_release.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000025#include "kmp_wrapper_getpid.h"
Jonathan Peytonf6399362018-07-09 17:51:13 +000026#include "kmp_dispatch.h"
27#if KMP_USE_HIER_SCHED
28#include "kmp_dispatch_hier.h"
29#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000030
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000031#if OMPT_SUPPORT
32#include "ompt-specific.h"
33#endif
34
Jim Cownie5e8470a2013-09-27 10:38:44 +000035/* these are temporary issues to be dealt with */
36#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000037
Jim Cownie5e8470a2013-09-27 10:38:44 +000038#if KMP_OS_WINDOWS
39#include <process.h>
40#endif
41
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000042#include "tsan_annotations.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000043
44#if defined(KMP_GOMP_COMPAT)
Jonathan Peyton30419822017-05-12 18:01:32 +000045char const __kmp_version_alt_comp[] =
46 KMP_VERSION_PREFIX "alternative compiler support: yes";
Jim Cownie5e8470a2013-09-27 10:38:44 +000047#endif /* defined(KMP_GOMP_COMPAT) */
48
49char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peytone844a542017-03-06 22:07:40 +000050#if OMP_50_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000051 "5.0 (201611)";
Jonathan Peytone844a542017-03-06 22:07:40 +000052#elif OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000053 "4.5 (201511)";
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000054#elif OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000055 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000056#else
Jonathan Peyton30419822017-05-12 18:01:32 +000057 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000058#endif
59
60#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +000061char const __kmp_version_lock[] =
62 KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000063#endif /* KMP_DEBUG */
64
Jonathan Peyton30419822017-05-12 18:01:32 +000065#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
Jim Cownie181b4bb2013-12-23 17:28:57 +000066
Jim Cownie5e8470a2013-09-27 10:38:44 +000067/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +000068
Jonathan Peyton37e2ef52018-07-09 17:36:22 +000069#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +000070kmp_info_t __kmp_monitor;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +000071#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000072
Jim Cownie5e8470a2013-09-27 10:38:44 +000073/* Forward declarations */
74
Jonathan Peyton30419822017-05-12 18:01:32 +000075void __kmp_cleanup(void);
Jim Cownie5e8470a2013-09-27 10:38:44 +000076
Jonathan Peyton30419822017-05-12 18:01:32 +000077static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
78 int gtid);
79static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
80 kmp_internal_control_t *new_icvs,
81 ident_t *loc);
Jonathan Peyton2321d572015-06-08 19:25:25 +000082#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +000083static void __kmp_partition_places(kmp_team_t *team,
84 int update_master_only = 0);
Jonathan Peyton2321d572015-06-08 19:25:25 +000085#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000086static void __kmp_do_serial_initialize(void);
87void __kmp_fork_barrier(int gtid, int tid);
88void __kmp_join_barrier(int gtid);
89void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
90 kmp_internal_control_t *new_icvs, ident_t *loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000091
Jim Cownie5e8470a2013-09-27 10:38:44 +000092#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +000093static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000094#endif
95
Jonathan Peyton1800ece2018-01-10 18:27:01 +000096static int __kmp_expand_threads(int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000097#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +000098static int __kmp_unregister_root_other_thread(int gtid);
Jonathan Peyton2321d572015-06-08 19:25:25 +000099#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000100static void __kmp_unregister_library(void); // called by __kmp_internal_end()
101static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
Jonathan Peytoneaa9e402018-01-10 18:21:48 +0000102kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103
Jim Cownie5e8470a2013-09-27 10:38:44 +0000104/* Calculate the identifier of the current thread */
Jonathan Peyton30419822017-05-12 18:01:32 +0000105/* fast (and somewhat portable) way to get unique identifier of executing
106 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
Jonathan Peyton30419822017-05-12 18:01:32 +0000107int __kmp_get_global_thread_id() {
108 int i;
109 kmp_info_t **other_threads;
110 size_t stack_data;
111 char *stack_addr;
112 size_t stack_size;
113 char *stack_base;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000114
Jonathan Peyton30419822017-05-12 18:01:32 +0000115 KA_TRACE(
116 1000,
117 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
118 __kmp_nth, __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000119
Jonathan Peyton30419822017-05-12 18:01:32 +0000120 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
121 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
122 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
123 __kmp_init_gtid for this to work. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000124
Jonathan Peyton30419822017-05-12 18:01:32 +0000125 if (!TCR_4(__kmp_init_gtid))
126 return KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000127
128#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000129 if (TCR_4(__kmp_gtid_mode) >= 3) {
130 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"));
131 return __kmp_gtid;
132 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000133#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000134 if (TCR_4(__kmp_gtid_mode) >= 2) {
135 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"));
136 return __kmp_gtid_get_specific();
137 }
138 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"));
139
140 stack_addr = (char *)&stack_data;
141 other_threads = __kmp_threads;
142
143 /* ATT: The code below is a source of potential bugs due to unsynchronized
144 access to __kmp_threads array. For example:
145 1. Current thread loads other_threads[i] to thr and checks it, it is
146 non-NULL.
147 2. Current thread is suspended by OS.
148 3. Another thread unregisters and finishes (debug versions of free()
149 may fill memory with something like 0xEF).
150 4. Current thread is resumed.
151 5. Current thread reads junk from *thr.
152 TODO: Fix it. --ln */
153
154 for (i = 0; i < __kmp_threads_capacity; i++) {
155
156 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
157 if (!thr)
158 continue;
159
160 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
161 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
162
163 /* stack grows down -- search through all of the active threads */
164
165 if (stack_addr <= stack_base) {
166 size_t stack_diff = stack_base - stack_addr;
167
168 if (stack_diff <= stack_size) {
169 /* The only way we can be closer than the allocated */
170 /* stack size is if we are running on this thread. */
171 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
172 return i;
173 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000174 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000175 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000176
Jonathan Peyton30419822017-05-12 18:01:32 +0000177 /* get specific to try and determine our gtid */
178 KA_TRACE(1000,
179 ("*** __kmp_get_global_thread_id: internal alg. failed to find "
180 "thread, using TLS\n"));
181 i = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000182
Jonathan Peyton30419822017-05-12 18:01:32 +0000183 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000184
Jonathan Peyton30419822017-05-12 18:01:32 +0000185 /* if we havn't been assigned a gtid, then return code */
186 if (i < 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000187 return i;
Jonathan Peyton30419822017-05-12 18:01:32 +0000188
189 /* dynamically updated stack window for uber threads to avoid get_specific
190 call */
191 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
192 KMP_FATAL(StackOverflow, i);
193 }
194
195 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
196 if (stack_addr > stack_base) {
197 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
198 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
199 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
200 stack_base);
201 } else {
202 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
203 stack_base - stack_addr);
204 }
205
206 /* Reprint stack bounds for ubermaster since they have been refined */
207 if (__kmp_storage_map) {
208 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
209 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
210 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
211 other_threads[i]->th.th_info.ds.ds_stacksize,
212 "th_%d stack (refinement)", i);
213 }
214 return i;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000215}
216
Jonathan Peyton30419822017-05-12 18:01:32 +0000217int __kmp_get_global_thread_id_reg() {
218 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000219
Jonathan Peyton30419822017-05-12 18:01:32 +0000220 if (!__kmp_init_serial) {
221 gtid = KMP_GTID_DNE;
222 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000223#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000224 if (TCR_4(__kmp_gtid_mode) >= 3) {
225 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"));
226 gtid = __kmp_gtid;
227 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000228#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000229 if (TCR_4(__kmp_gtid_mode) >= 2) {
230 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
231 gtid = __kmp_gtid_get_specific();
232 } else {
233 KA_TRACE(1000,
234 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
235 gtid = __kmp_get_global_thread_id();
236 }
237
238 /* we must be a new uber master sibling thread */
239 if (gtid == KMP_GTID_DNE) {
240 KA_TRACE(10,
241 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
242 "Registering a new gtid.\n"));
243 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
244 if (!__kmp_init_serial) {
245 __kmp_do_serial_initialize();
246 gtid = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000248 gtid = __kmp_register_root(FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000249 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000250 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
251 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
252 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000253
Jonathan Peyton30419822017-05-12 18:01:32 +0000254 KMP_DEBUG_ASSERT(gtid >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000255
Jonathan Peyton30419822017-05-12 18:01:32 +0000256 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000257}
258
259/* caller must hold forkjoin_lock */
Jonathan Peyton30419822017-05-12 18:01:32 +0000260void __kmp_check_stack_overlap(kmp_info_t *th) {
261 int f;
262 char *stack_beg = NULL;
263 char *stack_end = NULL;
264 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000265
Jonathan Peyton30419822017-05-12 18:01:32 +0000266 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"));
267 if (__kmp_storage_map) {
268 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
269 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000270
Jonathan Peyton30419822017-05-12 18:01:32 +0000271 gtid = __kmp_gtid_from_thread(th);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000272
Jonathan Peyton30419822017-05-12 18:01:32 +0000273 if (gtid == KMP_GTID_MONITOR) {
274 __kmp_print_storage_map_gtid(
275 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
276 "th_%s stack (%s)", "mon",
277 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000278 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000279 __kmp_print_storage_map_gtid(
280 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
281 "th_%d stack (%s)", gtid,
282 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
283 }
284 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000285
Jonathan Peyton30419822017-05-12 18:01:32 +0000286 /* No point in checking ubermaster threads since they use refinement and
287 * cannot overlap */
288 gtid = __kmp_gtid_from_thread(th);
289 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
290 KA_TRACE(10,
291 ("__kmp_check_stack_overlap: performing extensive checking\n"));
292 if (stack_beg == NULL) {
293 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
294 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
295 }
296
297 for (f = 0; f < __kmp_threads_capacity; f++) {
298 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
299
300 if (f_th && f_th != th) {
301 char *other_stack_end =
302 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
303 char *other_stack_beg =
304 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
305 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
306 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
307
308 /* Print the other stack values before the abort */
309 if (__kmp_storage_map)
310 __kmp_print_storage_map_gtid(
311 -1, other_stack_beg, other_stack_end,
312 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
313 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
314
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000315 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
316 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000317 }
318 }
319 }
320 }
321 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"));
322}
323
324/* ------------------------------------------------------------------------ */
325
326void __kmp_infinite_loop(void) {
327 static int done = FALSE;
328
329 while (!done) {
Jonathan Peytone47d32f2019-02-28 19:11:29 +0000330 KMP_YIELD(TRUE);
Jonathan Peyton30419822017-05-12 18:01:32 +0000331 }
332}
333
334#define MAX_MESSAGE 512
335
336void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
337 char const *format, ...) {
338 char buffer[MAX_MESSAGE];
339 va_list ap;
340
341 va_start(ap, format);
342 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
343 p2, (unsigned long)size, format);
344 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
345 __kmp_vprintf(kmp_err, buffer, ap);
346#if KMP_PRINT_DATA_PLACEMENT
347 int node;
348 if (gtid >= 0) {
349 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
350 if (__kmp_storage_map_verbose) {
351 node = __kmp_get_host_node(p1);
352 if (node < 0) /* doesn't work, so don't try this next time */
353 __kmp_storage_map_verbose = FALSE;
354 else {
355 char *last;
356 int lastNode;
357 int localProc = __kmp_get_cpu_from_gtid(gtid);
358
359 const int page_size = KMP_GET_PAGE_SIZE();
360
361 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
362 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
363 if (localProc >= 0)
364 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
365 localProc >> 1);
366 else
367 __kmp_printf_no_lock(" GTID %d\n", gtid);
368#if KMP_USE_PRCTL
369 /* The more elaborate format is disabled for now because of the prctl
370 * hanging bug. */
371 do {
372 last = p1;
373 lastNode = node;
374 /* This loop collates adjacent pages with the same host node. */
375 do {
376 (char *)p1 += page_size;
377 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
378 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
379 lastNode);
380 } while (p1 <= p2);
381#else
382 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
383 (char *)p1 + (page_size - 1),
384 __kmp_get_host_node(p1));
385 if (p1 < p2) {
386 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
387 (char *)p2 + (page_size - 1),
388 __kmp_get_host_node(p2));
389 }
390#endif
391 }
392 }
393 } else
394 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning));
395 }
396#endif /* KMP_PRINT_DATA_PLACEMENT */
397 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
398}
399
400void __kmp_warn(char const *format, ...) {
401 char buffer[MAX_MESSAGE];
402 va_list ap;
403
404 if (__kmp_generate_warnings == kmp_warnings_off) {
405 return;
406 }
407
408 va_start(ap, format);
409
410 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format);
411 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
412 __kmp_vprintf(kmp_err, buffer, ap);
413 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
414
415 va_end(ap);
416}
417
418void __kmp_abort_process() {
419 // Later threads may stall here, but that's ok because abort() will kill them.
420 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
421
422 if (__kmp_debug_buf) {
423 __kmp_dump_debug_buffer();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000424 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000425
426 if (KMP_OS_WINDOWS) {
427 // Let other threads know of abnormal termination and prevent deadlock
428 // if abort happened during library initialization or shutdown
429 __kmp_global.g.g_abort = SIGABRT;
430
431 /* On Windows* OS by default abort() causes pop-up error box, which stalls
432 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
433 boxes. _set_abort_behavior() works well, but this function is not
434 available in VS7 (this is not problem for DLL, but it is a problem for
435 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
436 help, at least in some versions of MS C RTL.
437
438 It seems following sequence is the only way to simulate abort() and
439 avoid pop-up error box. */
440 raise(SIGABRT);
441 _exit(3); // Just in case, if signal ignored, exit anyway.
442 } else {
443 abort();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000444 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000445
446 __kmp_infinite_loop();
447 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000448
449} // __kmp_abort_process
450
Jonathan Peyton30419822017-05-12 18:01:32 +0000451void __kmp_abort_thread(void) {
452 // TODO: Eliminate g_abort global variable and this function.
453 // In case of abort just call abort(), it will kill all the threads.
454 __kmp_infinite_loop();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000455} // __kmp_abort_thread
456
Jonathan Peyton30419822017-05-12 18:01:32 +0000457/* Print out the storage map for the major kmp_info_t thread data structures
458 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000459
Jonathan Peyton30419822017-05-12 18:01:32 +0000460static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
461 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
462 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000463
Jonathan Peyton30419822017-05-12 18:01:32 +0000464 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
465 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000466
Jonathan Peyton30419822017-05-12 18:01:32 +0000467 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
468 sizeof(kmp_local_t), "th_%d.th_local", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000469
Jonathan Peyton30419822017-05-12 18:01:32 +0000470 __kmp_print_storage_map_gtid(
471 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
472 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000473
Jonathan Peyton30419822017-05-12 18:01:32 +0000474 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
475 &thr->th.th_bar[bs_plain_barrier + 1],
476 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
477 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000478
Jonathan Peyton30419822017-05-12 18:01:32 +0000479 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
480 &thr->th.th_bar[bs_forkjoin_barrier + 1],
481 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
482 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000483
Jonathan Peyton30419822017-05-12 18:01:32 +0000484#if KMP_FAST_REDUCTION_BARRIER
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
486 &thr->th.th_bar[bs_reduction_barrier + 1],
487 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
488 gtid);
489#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000490}
491
Jonathan Peyton30419822017-05-12 18:01:32 +0000492/* Print out the storage map for the major kmp_team_t team data structures
493 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000494
Jonathan Peyton30419822017-05-12 18:01:32 +0000495static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
496 int team_id, int num_thr) {
497 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
498 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
499 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000500
Jonathan Peyton30419822017-05-12 18:01:32 +0000501 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
502 &team->t.t_bar[bs_last_barrier],
503 sizeof(kmp_balign_team_t) * bs_last_barrier,
504 "%s_%d.t_bar", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000505
Jonathan Peyton30419822017-05-12 18:01:32 +0000506 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
507 &team->t.t_bar[bs_plain_barrier + 1],
508 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
509 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000510
Jonathan Peyton30419822017-05-12 18:01:32 +0000511 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
512 &team->t.t_bar[bs_forkjoin_barrier + 1],
513 sizeof(kmp_balign_team_t),
514 "%s_%d.t_bar[forkjoin]", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000515
Jonathan Peyton30419822017-05-12 18:01:32 +0000516#if KMP_FAST_REDUCTION_BARRIER
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
518 &team->t.t_bar[bs_reduction_barrier + 1],
519 sizeof(kmp_balign_team_t),
520 "%s_%d.t_bar[reduction]", header, team_id);
521#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000522
Jonathan Peyton30419822017-05-12 18:01:32 +0000523 __kmp_print_storage_map_gtid(
524 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
525 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000526
Jonathan Peyton30419822017-05-12 18:01:32 +0000527 __kmp_print_storage_map_gtid(
528 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
529 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000530
Jonathan Peyton30419822017-05-12 18:01:32 +0000531 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
532 &team->t.t_disp_buffer[num_disp_buff],
533 sizeof(dispatch_shared_info_t) * num_disp_buff,
534 "%s_%d.t_disp_buffer", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000535
Jonathan Peyton30419822017-05-12 18:01:32 +0000536 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
537 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header,
538 team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000539}
540
Jonathan Peyton92ca6182018-09-07 18:25:49 +0000541static void __kmp_init_allocator() {
542#if OMP_50_ENABLED
543 __kmp_init_memkind();
544#endif
545}
546static void __kmp_fini_allocator() {
547#if OMP_50_ENABLED
548 __kmp_fini_memkind();
549#endif
550}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000551
552/* ------------------------------------------------------------------------ */
553
Jonathan Peyton8b3842f2018-10-05 17:59:39 +0000554#if KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +0000555#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +0000556
Jonathan Peyton30419822017-05-12 18:01:32 +0000557static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
558 // TODO: Change to __kmp_break_bootstrap_lock().
559 __kmp_init_bootstrap_lock(lck); // make the lock released
Jim Cownie5e8470a2013-09-27 10:38:44 +0000560}
561
Jonathan Peyton30419822017-05-12 18:01:32 +0000562static void __kmp_reset_locks_on_process_detach(int gtid_req) {
563 int i;
564 int thread_count;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000565
Jonathan Peyton30419822017-05-12 18:01:32 +0000566 // PROCESS_DETACH is expected to be called by a thread that executes
567 // ProcessExit() or FreeLibrary(). OS terminates other threads (except the one
568 // calling ProcessExit or FreeLibrary). So, it might be safe to access the
569 // __kmp_threads[] without taking the forkjoin_lock. However, in fact, some
570 // threads can be still alive here, although being about to be terminated. The
571 // threads in the array with ds_thread==0 are most suspicious. Actually, it
572 // can be not safe to access the __kmp_threads[].
Jim Cownie5e8470a2013-09-27 10:38:44 +0000573
Jonathan Peyton30419822017-05-12 18:01:32 +0000574 // TODO: does it make sense to check __kmp_roots[] ?
Jim Cownie5e8470a2013-09-27 10:38:44 +0000575
Jonathan Peyton30419822017-05-12 18:01:32 +0000576 // Let's check that there are no other alive threads registered with the OMP
577 // lib.
578 while (1) {
579 thread_count = 0;
580 for (i = 0; i < __kmp_threads_capacity; ++i) {
581 if (!__kmp_threads)
582 continue;
583 kmp_info_t *th = __kmp_threads[i];
584 if (th == NULL)
585 continue;
586 int gtid = th->th.th_info.ds.ds_gtid;
587 if (gtid == gtid_req)
588 continue;
589 if (gtid < 0)
590 continue;
591 DWORD exit_val;
592 int alive = __kmp_is_thread_alive(th, &exit_val);
593 if (alive) {
594 ++thread_count;
595 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000596 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000597 if (thread_count == 0)
598 break; // success
599 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000600
Jonathan Peyton30419822017-05-12 18:01:32 +0000601 // Assume that I'm alone. Now it might be safe to check and reset locks.
602 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
603 __kmp_reset_lock(&__kmp_forkjoin_lock);
604#ifdef KMP_DEBUG
605 __kmp_reset_lock(&__kmp_stdio_lock);
606#endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000607}
608
Jonathan Peyton30419822017-05-12 18:01:32 +0000609BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
610 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000611
Jonathan Peyton30419822017-05-12 18:01:32 +0000612 switch (fdwReason) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000613
Jonathan Peyton30419822017-05-12 18:01:32 +0000614 case DLL_PROCESS_ATTACH:
615 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000616
617 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000618
619 case DLL_PROCESS_DETACH:
620 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
621
622 if (lpReserved != NULL) {
623 // lpReserved is used for telling the difference:
624 // lpReserved == NULL when FreeLibrary() was called,
625 // lpReserved != NULL when the process terminates.
626 // When FreeLibrary() is called, worker threads remain alive. So they will
627 // release the forkjoin lock by themselves. When the process terminates,
628 // worker threads disappear triggering the problem of unreleased forkjoin
629 // lock as described below.
630
631 // A worker thread can take the forkjoin lock. The problem comes up if
632 // that worker thread becomes dead before it releases the forkjoin lock.
633 // The forkjoin lock remains taken, while the thread executing
634 // DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below will try
635 // to take the forkjoin lock and will always fail, so that the application
636 // will never finish [normally]. This scenario is possible if
637 // __kmpc_end() has not been executed. It looks like it's not a corner
638 // case, but common cases:
639 // - the main function was compiled by an alternative compiler;
640 // - the main function was compiled by icl but without /Qopenmp
641 // (application with plugins);
642 // - application terminates by calling C exit(), Fortran CALL EXIT() or
643 // Fortran STOP.
644 // - alive foreign thread prevented __kmpc_end from doing cleanup.
645 //
646 // This is a hack to work around the problem.
647 // TODO: !!! figure out something better.
648 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
649 }
650
651 __kmp_internal_end_library(__kmp_gtid_get_specific());
652
653 return TRUE;
654
655 case DLL_THREAD_ATTACH:
656 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"));
657
658 /* if we want to register new siblings all the time here call
659 * __kmp_get_gtid(); */
660 return TRUE;
661
662 case DLL_THREAD_DETACH:
663 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
664
665 __kmp_internal_end_thread(__kmp_gtid_get_specific());
666 return TRUE;
667 }
668
669 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000670}
671
Jonathan Peyton30419822017-05-12 18:01:32 +0000672#endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000673#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000674
Jonathan Peyton30419822017-05-12 18:01:32 +0000675/* __kmp_parallel_deo -- Wait until it's our turn. */
676void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
677 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000678#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000679 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000680#endif /* BUILD_PARALLEL_ORDERED */
681
Jonathan Peyton30419822017-05-12 18:01:32 +0000682 if (__kmp_env_consistency_check) {
683 if (__kmp_threads[gtid]->th.th_root->r.r_active)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000684#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000685 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000686#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000687 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000688#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000689 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000690#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000691 if (!team->t.t_serialized) {
692 KMP_MB();
Jonathan Peytone47d32f2019-02-28 19:11:29 +0000693 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
694 NULL);
Jonathan Peyton30419822017-05-12 18:01:32 +0000695 KMP_MB();
696 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000697#endif /* BUILD_PARALLEL_ORDERED */
698}
699
Jonathan Peyton30419822017-05-12 18:01:32 +0000700/* __kmp_parallel_dxo -- Signal the next task. */
701void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
702 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000703#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000704 int tid = __kmp_tid_from_gtid(gtid);
705 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000706#endif /* BUILD_PARALLEL_ORDERED */
707
Jonathan Peyton30419822017-05-12 18:01:32 +0000708 if (__kmp_env_consistency_check) {
709 if (__kmp_threads[gtid]->th.th_root->r.r_active)
710 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
711 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000712#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000713 if (!team->t.t_serialized) {
714 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000715
Jonathan Peyton30419822017-05-12 18:01:32 +0000716 /* use the tid of the next thread in this team */
717 /* TODO replace with general release procedure */
718 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000719
Jonathan Peyton30419822017-05-12 18:01:32 +0000720 KMP_MB(); /* Flush all pending memory write invalidates. */
721 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000722#endif /* BUILD_PARALLEL_ORDERED */
723}
724
725/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000726/* The BARRIER for a SINGLE process section is always explicit */
727
Jonathan Peyton30419822017-05-12 18:01:32 +0000728int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
729 int status;
730 kmp_info_t *th;
731 kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000732
Jonathan Peyton30419822017-05-12 18:01:32 +0000733 if (!TCR_4(__kmp_init_parallel))
734 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000735
Jonathan Peyton9b8bb322019-01-16 20:07:39 +0000736#if OMP_50_ENABLED
737 __kmp_resume_if_soft_paused();
738#endif
739
Jonathan Peyton30419822017-05-12 18:01:32 +0000740 th = __kmp_threads[gtid];
741 team = th->th.th_team;
742 status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000743
Jonathan Peyton30419822017-05-12 18:01:32 +0000744 th->th.th_ident = id_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000745
Jonathan Peyton30419822017-05-12 18:01:32 +0000746 if (team->t.t_serialized) {
747 status = 1;
748 } else {
749 kmp_int32 old_this = th->th.th_local.this_construct;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000750
Jonathan Peyton30419822017-05-12 18:01:32 +0000751 ++th->th.th_local.this_construct;
752 /* try to set team count to thread count--success means thread got the
753 single block */
754 /* TODO: Should this be acquire or release? */
755 if (team->t.t_construct == old_this) {
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000756 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
757 th->th.th_local.this_construct);
Jonathan Peyton30419822017-05-12 18:01:32 +0000758 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000759#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000760 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
761 KMP_MASTER_GTID(gtid) &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000762#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000763 th->th.th_teams_microtask == NULL &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000764#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000765 team->t.t_active_level ==
766 1) { // Only report metadata by master of active team at level 1
767 __kmp_itt_metadata_single(id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000768 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000769#endif /* USE_ITT_BUILD */
770 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000771
Jonathan Peyton30419822017-05-12 18:01:32 +0000772 if (__kmp_env_consistency_check) {
773 if (status && push_ws) {
774 __kmp_push_workshare(gtid, ct_psingle, id_ref);
775 } else {
776 __kmp_check_workshare(gtid, ct_psingle, id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000777 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000778 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000779#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000780 if (status) {
781 __kmp_itt_single_start(gtid);
782 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000783#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000784 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000785}
786
Jonathan Peyton30419822017-05-12 18:01:32 +0000787void __kmp_exit_single(int gtid) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000788#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000789 __kmp_itt_single_end(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000790#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000791 if (__kmp_env_consistency_check)
792 __kmp_pop_workshare(gtid, ct_psingle, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000793}
794
Jonathan Peyton30419822017-05-12 18:01:32 +0000795/* determine if we can go parallel or must use a serialized parallel region and
Jim Cownie5e8470a2013-09-27 10:38:44 +0000796 * how many threads we can use
797 * set_nproc is the number of threads requested for the team
798 * returns 0 if we should serialize or only use one thread,
799 * otherwise the number of threads to use
Jonathan Peyton30419822017-05-12 18:01:32 +0000800 * The forkjoin lock is held by the caller. */
801static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
802 int master_tid, int set_nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +0000803#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000804 ,
805 int enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +0000806#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +0000807 ) {
808 int capacity;
809 int new_nthreads;
810 KMP_DEBUG_ASSERT(__kmp_init_serial);
811 KMP_DEBUG_ASSERT(root && parent_team);
Jonathan Peyton65ebfee2019-02-11 21:04:23 +0000812 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000813
Jonathan Peyton30419822017-05-12 18:01:32 +0000814 // If dyn-var is set, dynamically adjust the number of desired threads,
815 // according to the method specified by dynamic_mode.
816 new_nthreads = set_nthreads;
817 if (!get__dynamic_2(parent_team, master_tid)) {
818 ;
819 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000820#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +0000821 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
822 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
823 if (new_nthreads == 1) {
824 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
825 "reservation to 1 thread\n",
826 master_tid));
827 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000828 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000829 if (new_nthreads < set_nthreads) {
830 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
831 "reservation to %d threads\n",
832 master_tid, new_nthreads));
833 }
834 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000835#endif /* USE_LOAD_BALANCE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000836 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
837 new_nthreads = __kmp_avail_proc - __kmp_nth +
838 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
839 if (new_nthreads <= 1) {
840 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
841 "reservation to 1 thread\n",
842 master_tid));
843 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000844 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000845 if (new_nthreads < set_nthreads) {
846 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
847 "reservation to %d threads\n",
848 master_tid, new_nthreads));
849 } else {
850 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000851 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000852 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
853 if (set_nthreads > 2) {
854 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
855 new_nthreads = (new_nthreads % set_nthreads) + 1;
856 if (new_nthreads == 1) {
857 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
858 "reservation to 1 thread\n",
859 master_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000860 return 1;
Jonathan Peyton30419822017-05-12 18:01:32 +0000861 }
862 if (new_nthreads < set_nthreads) {
863 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
864 "reservation to %d threads\n",
865 master_tid, new_nthreads));
866 }
867 }
868 } else {
869 KMP_ASSERT(0);
870 }
871
Jonathan Peytonf4392462017-07-27 20:58:41 +0000872 // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
Jonathan Peyton30419822017-05-12 18:01:32 +0000873 if (__kmp_nth + new_nthreads -
874 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
875 __kmp_max_nth) {
876 int tl_nthreads = __kmp_max_nth - __kmp_nth +
877 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
878 if (tl_nthreads <= 0) {
879 tl_nthreads = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000880 }
881
Jonathan Peyton30419822017-05-12 18:01:32 +0000882 // If dyn-var is false, emit a 1-time warning.
883 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
884 __kmp_reserve_warn = 1;
885 __kmp_msg(kmp_ms_warning,
886 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
887 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
888 }
889 if (tl_nthreads == 1) {
Jonathan Peytonf4392462017-07-27 20:58:41 +0000890 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
891 "reduced reservation to 1 thread\n",
Jonathan Peyton30419822017-05-12 18:01:32 +0000892 master_tid));
893 return 1;
894 }
Jonathan Peytonf4392462017-07-27 20:58:41 +0000895 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
896 "reservation to %d threads\n",
897 master_tid, tl_nthreads));
898 new_nthreads = tl_nthreads;
899 }
900
901 // Respect OMP_THREAD_LIMIT
Jonathan Peyton65ebfee2019-02-11 21:04:23 +0000902 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
903 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
904 if (cg_nthreads + new_nthreads -
Jonathan Peytonf4392462017-07-27 20:58:41 +0000905 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
Jonathan Peyton65ebfee2019-02-11 21:04:23 +0000906 max_cg_threads) {
907 int tl_nthreads = max_cg_threads - cg_nthreads +
Jonathan Peytonf4392462017-07-27 20:58:41 +0000908 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
909 if (tl_nthreads <= 0) {
910 tl_nthreads = 1;
911 }
912
913 // If dyn-var is false, emit a 1-time warning.
914 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
915 __kmp_reserve_warn = 1;
916 __kmp_msg(kmp_ms_warning,
917 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
918 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
919 }
920 if (tl_nthreads == 1) {
921 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
922 "reduced reservation to 1 thread\n",
923 master_tid));
924 return 1;
925 }
926 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
Jonathan Peyton30419822017-05-12 18:01:32 +0000927 "reservation to %d threads\n",
928 master_tid, tl_nthreads));
929 new_nthreads = tl_nthreads;
930 }
931
932 // Check if the threads array is large enough, or needs expanding.
Jonathan Peyton30419822017-05-12 18:01:32 +0000933 // See comment in __kmp_register_root() about the adjustment if
934 // __kmp_threads[0] == NULL.
935 capacity = __kmp_threads_capacity;
936 if (TCR_PTR(__kmp_threads[0]) == NULL) {
937 --capacity;
938 }
939 if (__kmp_nth + new_nthreads -
940 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
941 capacity) {
942 // Expand the threads array.
943 int slotsRequired = __kmp_nth + new_nthreads -
944 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
945 capacity;
Jonathan Peyton1800ece2018-01-10 18:27:01 +0000946 int slotsAdded = __kmp_expand_threads(slotsRequired);
Jonathan Peyton30419822017-05-12 18:01:32 +0000947 if (slotsAdded < slotsRequired) {
948 // The threads array was not expanded enough.
949 new_nthreads -= (slotsRequired - slotsAdded);
950 KMP_ASSERT(new_nthreads >= 1);
951
952 // If dyn-var is false, emit a 1-time warning.
953 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
954 __kmp_reserve_warn = 1;
955 if (__kmp_tp_cached) {
956 __kmp_msg(kmp_ms_warning,
957 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
958 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
959 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
960 } else {
961 __kmp_msg(kmp_ms_warning,
962 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
963 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
964 }
965 }
966 }
967 }
968
Jonathan Peyton642688b2017-06-01 16:46:36 +0000969#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000970 if (new_nthreads == 1) {
971 KC_TRACE(10,
972 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
973 "dead roots and rechecking; requested %d threads\n",
974 __kmp_get_gtid(), set_nthreads));
Jonathan Peyton642688b2017-06-01 16:46:36 +0000975 } else {
976 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
977 " %d threads\n",
978 __kmp_get_gtid(), new_nthreads, set_nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +0000979 }
Jonathan Peyton642688b2017-06-01 16:46:36 +0000980#endif // KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000981 return new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000982}
983
Jonathan Peyton30419822017-05-12 18:01:32 +0000984/* Allocate threads from the thread pool and assign them to the new team. We are
985 assured that there are enough threads available, because we checked on that
986 earlier within critical section forkjoin */
987static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
988 kmp_info_t *master_th, int master_gtid) {
989 int i;
990 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000991
Jonathan Peyton30419822017-05-12 18:01:32 +0000992 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
993 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
994 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000995
Jonathan Peyton30419822017-05-12 18:01:32 +0000996 /* first, let's setup the master thread */
997 master_th->th.th_info.ds.ds_tid = 0;
998 master_th->th.th_team = team;
999 master_th->th.th_team_nproc = team->t.t_nproc;
1000 master_th->th.th_team_master = master_th;
1001 master_th->th.th_team_serialized = FALSE;
1002 master_th->th.th_dispatch = &team->t.t_dispatch[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001003
Jonathan Peyton30419822017-05-12 18:01:32 +00001004/* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001005#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001006 use_hot_team = 0;
1007 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1008 if (hot_teams) { // hot teams array is not allocated if
1009 // KMP_HOT_TEAMS_MAX_LEVEL=0
1010 int level = team->t.t_active_level - 1; // index in array of hot teams
1011 if (master_th->th.th_teams_microtask) { // are we inside the teams?
1012 if (master_th->th.th_teams_size.nteams > 1) {
1013 ++level; // level was not increased in teams construct for
1014 // team_of_masters
1015 }
1016 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1017 master_th->th.th_teams_level == team->t.t_level) {
1018 ++level; // level was not increased in teams construct for
1019 // team_of_workers before the parallel
1020 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001021 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001022 if (level < __kmp_hot_teams_max_level) {
1023 if (hot_teams[level].hot_team) {
1024 // hot team has already been allocated for given level
1025 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1026 use_hot_team = 1; // the team is ready to use
1027 } else {
1028 use_hot_team = 0; // AC: threads are not allocated yet
1029 hot_teams[level].hot_team = team; // remember new hot team
1030 hot_teams[level].hot_team_nth = team->t.t_nproc;
1031 }
1032 } else {
1033 use_hot_team = 0;
1034 }
1035 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001036#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001037 use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001038#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001039 if (!use_hot_team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001040
Jonathan Peyton30419822017-05-12 18:01:32 +00001041 /* install the master thread */
1042 team->t.t_threads[0] = master_th;
1043 __kmp_initialize_info(master_th, team, 0, master_gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001044
Jonathan Peyton30419822017-05-12 18:01:32 +00001045 /* now, install the worker threads */
1046 for (i = 1; i < team->t.t_nproc; i++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001047
Jonathan Peyton30419822017-05-12 18:01:32 +00001048 /* fork or reallocate a new thread and install it in team */
1049 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1050 team->t.t_threads[i] = thr;
1051 KMP_DEBUG_ASSERT(thr);
1052 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1053 /* align team and thread arrived states */
1054 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1055 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1056 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1057 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1058 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1059 team->t.t_bar[bs_plain_barrier].b_arrived));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001060#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001061 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1062 thr->th.th_teams_level = master_th->th.th_teams_level;
1063 thr->th.th_teams_size = master_th->th.th_teams_size;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001064#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001065 { // Initialize threads' barrier data.
1066 int b;
1067 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1068 for (b = 0; b < bs_last_barrier; ++b) {
1069 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1070 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001071#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001072 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001073#endif
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001074 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001075 }
1076 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001077
Alp Toker98758b02014-03-02 04:12:06 +00001078#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001079 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001080#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001081 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001082
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001083#if OMP_50_ENABLED
1084 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1085 for (i = 0; i < team->t.t_nproc; i++) {
1086 kmp_info_t *thr = team->t.t_threads[i];
1087 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1088 thr->th.th_prev_level != team->t.t_level) {
1089 team->t.t_display_affinity = 1;
1090 break;
1091 }
1092 }
1093 }
1094#endif
1095
Jonathan Peyton30419822017-05-12 18:01:32 +00001096 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001097}
1098
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001099#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001100// Propagate any changes to the floating point control registers out to the team
Jonathan Peyton30419822017-05-12 18:01:32 +00001101// We try to avoid unnecessary writes to the relevant cache line in the team
1102// structure, so we don't make changes unless they are needed.
1103inline static void propagateFPControl(kmp_team_t *team) {
1104 if (__kmp_inherit_fp_control) {
1105 kmp_int16 x87_fpu_control_word;
1106 kmp_uint32 mxcsr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001107
Jonathan Peyton30419822017-05-12 18:01:32 +00001108 // Get master values of FPU control flags (both X87 and vector)
1109 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1110 __kmp_store_mxcsr(&mxcsr);
1111 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001112
Jonathan Peyton94a114f2017-10-20 19:30:57 +00001113 // There is no point looking at t_fp_control_saved here.
1114 // If it is TRUE, we still have to update the values if they are different
Jonas Hahnfeldf0a1c652017-11-03 18:28:19 +00001115 // from those we now have. If it is FALSE we didn't save anything yet, but
1116 // our objective is the same. We have to ensure that the values in the team
1117 // are the same as those we have.
Jonathan Peyton94a114f2017-10-20 19:30:57 +00001118 // So, this code achieves what we need whether or not t_fp_control_saved is
1119 // true. By checking whether the value needs updating we avoid unnecessary
1120 // writes that would put the cache-line into a written state, causing all
1121 // threads in the team to have to read it again.
Jonathan Peyton30419822017-05-12 18:01:32 +00001122 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1123 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1124 // Although we don't use this value, other code in the runtime wants to know
1125 // whether it should restore them. So we must ensure it is correct.
1126 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1127 } else {
1128 // Similarly here. Don't write to this cache-line in the team structure
1129 // unless we have to.
1130 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1131 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001132}
1133
Jonathan Peyton30419822017-05-12 18:01:32 +00001134// Do the opposite, setting the hardware registers to the updated values from
1135// the team.
1136inline static void updateHWFPControl(kmp_team_t *team) {
1137 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1138 // Only reset the fp control regs if they have been changed in the team.
1139 // the parallel region that we are exiting.
1140 kmp_int16 x87_fpu_control_word;
1141 kmp_uint32 mxcsr;
1142 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1143 __kmp_store_mxcsr(&mxcsr);
1144 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001145
Jonathan Peyton30419822017-05-12 18:01:32 +00001146 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1147 __kmp_clear_x87_fpu_status_word();
1148 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001149 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001150
1151 if (team->t.t_mxcsr != mxcsr) {
1152 __kmp_load_mxcsr(&team->t.t_mxcsr);
1153 }
1154 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001155}
1156#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001157#define propagateFPControl(x) ((void)0)
1158#define updateHWFPControl(x) ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001159#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1160
Jonathan Peyton30419822017-05-12 18:01:32 +00001161static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1162 int realloc); // forward declaration
Jim Cownie5e8470a2013-09-27 10:38:44 +00001163
Jonathan Peyton30419822017-05-12 18:01:32 +00001164/* Run a parallel region that has been serialized, so runs only in a team of the
1165 single master thread. */
1166void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1167 kmp_info_t *this_thr;
1168 kmp_team_t *serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001169
Jonathan Peyton30419822017-05-12 18:01:32 +00001170 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001171
Jonathan Peyton30419822017-05-12 18:01:32 +00001172 /* Skip all this code for autopar serialized loops since it results in
1173 unacceptable overhead */
1174 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
1175 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001176
Jonathan Peyton30419822017-05-12 18:01:32 +00001177 if (!TCR_4(__kmp_init_parallel))
1178 __kmp_parallel_initialize();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001179
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001180#if OMP_50_ENABLED
1181 __kmp_resume_if_soft_paused();
1182#endif
1183
Jonathan Peyton30419822017-05-12 18:01:32 +00001184 this_thr = __kmp_threads[global_tid];
1185 serial_team = this_thr->th.th_serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001186
Jonathan Peyton30419822017-05-12 18:01:32 +00001187 /* utilize the serialized team held by this thread */
1188 KMP_DEBUG_ASSERT(serial_team);
1189 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001190
Jonathan Peyton30419822017-05-12 18:01:32 +00001191 if (__kmp_tasking_mode != tskm_immediate_exec) {
1192 KMP_DEBUG_ASSERT(
1193 this_thr->th.th_task_team ==
1194 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1195 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1196 NULL);
1197 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1198 "team %p, new task_team = NULL\n",
1199 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1200 this_thr->th.th_task_team = NULL;
1201 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001202
1203#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001204 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1205 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1206 proc_bind = proc_bind_false;
1207 } else if (proc_bind == proc_bind_default) {
1208 // No proc_bind clause was specified, so use the current value
1209 // of proc-bind-var for this parallel region.
1210 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1211 }
1212 // Reset for next parallel region
1213 this_thr->th.th_set_proc_bind = proc_bind_default;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001214#endif /* OMP_40_ENABLED */
1215
Joachim Protze82e94a52017-11-01 10:08:30 +00001216#if OMPT_SUPPORT
Jonathan Peyton3574f282018-10-04 14:57:04 +00001217 ompt_data_t ompt_parallel_data = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001218 ompt_data_t *implicit_task_data;
1219 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1220 if (ompt_enabled.enabled &&
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001221 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
Joachim Protze82e94a52017-11-01 10:08:30 +00001222
1223 ompt_task_info_t *parent_task_info;
1224 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1225
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001226 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001227 if (ompt_enabled.ompt_callback_parallel_begin) {
1228 int team_size = 1;
1229
1230 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1231 &(parent_task_info->task_data), &(parent_task_info->frame),
Joachim Protze489cdb72018-09-10 14:34:54 +00001232 &ompt_parallel_data, team_size, ompt_parallel_invoker_program,
1233 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001234 }
1235 }
1236#endif // OMPT_SUPPORT
1237
Jonathan Peyton30419822017-05-12 18:01:32 +00001238 if (this_thr->th.th_team != serial_team) {
1239 // Nested level will be an index in the nested nthreads array
1240 int level = this_thr->th.th_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001241
Jonathan Peyton30419822017-05-12 18:01:32 +00001242 if (serial_team->t.t_serialized) {
1243 /* this serial team was already used
1244 TODO increase performance by making this locks more specific */
1245 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001246
Jonathan Peyton30419822017-05-12 18:01:32 +00001247 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001248
Jonathan Peyton30419822017-05-12 18:01:32 +00001249 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001250#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001251 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001252#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001253#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001254 proc_bind,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001255#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001256 &this_thr->th.th_current_task->td_icvs,
1257 0 USE_NESTED_HOT_ARG(NULL));
1258 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1259 KMP_ASSERT(new_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001260
Jonathan Peyton30419822017-05-12 18:01:32 +00001261 /* setup new serialized team and install it */
1262 new_team->t.t_threads[0] = this_thr;
1263 new_team->t.t_parent = this_thr->th.th_team;
1264 serial_team = new_team;
1265 this_thr->th.th_serial_team = serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001266
Jonathan Peyton30419822017-05-12 18:01:32 +00001267 KF_TRACE(
1268 10,
1269 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1270 global_tid, serial_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001271
Jonathan Peyton30419822017-05-12 18:01:32 +00001272 /* TODO the above breaks the requirement that if we run out of resources,
1273 then we can still guarantee that serialized teams are ok, since we may
1274 need to allocate a new one */
1275 } else {
1276 KF_TRACE(
1277 10,
1278 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1279 global_tid, serial_team));
1280 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001281
Jonathan Peyton30419822017-05-12 18:01:32 +00001282 /* we have to initialize this serial team */
1283 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1284 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1285 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1286 serial_team->t.t_ident = loc;
1287 serial_team->t.t_serialized = 1;
1288 serial_team->t.t_nproc = 1;
1289 serial_team->t.t_parent = this_thr->th.th_team;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00001290 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00001291 this_thr->th.th_team = serial_team;
1292 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001293
Jonathan Peyton30419822017-05-12 18:01:32 +00001294 KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1295 this_thr->th.th_current_task));
1296 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1297 this_thr->th.th_current_task->td_flags.executing = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001298
Jonathan Peyton30419822017-05-12 18:01:32 +00001299 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001300
Jonathan Peyton30419822017-05-12 18:01:32 +00001301 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1302 implicit task for each serialized task represented by
1303 team->t.t_serialized? */
1304 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1305 &this_thr->th.th_current_task->td_parent->td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001306
Jonathan Peyton30419822017-05-12 18:01:32 +00001307 // Thread value exists in the nested nthreads array for the next nested
1308 // level
1309 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1310 this_thr->th.th_current_task->td_icvs.nproc =
1311 __kmp_nested_nth.nth[level + 1];
1312 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001313
1314#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001315 if (__kmp_nested_proc_bind.used &&
1316 (level + 1 < __kmp_nested_proc_bind.used)) {
1317 this_thr->th.th_current_task->td_icvs.proc_bind =
1318 __kmp_nested_proc_bind.bind_types[level + 1];
1319 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001320#endif /* OMP_40_ENABLED */
1321
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001322#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001323 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001324#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001325 this_thr->th.th_info.ds.ds_tid = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001326
Jonathan Peyton30419822017-05-12 18:01:32 +00001327 /* set thread cache values */
1328 this_thr->th.th_team_nproc = 1;
1329 this_thr->th.th_team_master = this_thr;
1330 this_thr->th.th_team_serialized = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001331
Jonathan Peyton30419822017-05-12 18:01:32 +00001332 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1333 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001334#if OMP_50_ENABLED
1335 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save
1336#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001337
Jonathan Peyton30419822017-05-12 18:01:32 +00001338 propagateFPControl(serial_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001339
Jonathan Peyton30419822017-05-12 18:01:32 +00001340 /* check if we need to allocate dispatch buffers stack */
1341 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1342 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1343 serial_team->t.t_dispatch->th_disp_buffer =
1344 (dispatch_private_info_t *)__kmp_allocate(
1345 sizeof(dispatch_private_info_t));
1346 }
1347 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001348
Jonathan Peyton30419822017-05-12 18:01:32 +00001349 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001350
Jonathan Peyton30419822017-05-12 18:01:32 +00001351 } else {
1352 /* this serialized team is already being used,
1353 * that's fine, just add another nested level */
1354 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1355 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1356 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1357 ++serial_team->t.t_serialized;
1358 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001359
Jonathan Peyton30419822017-05-12 18:01:32 +00001360 // Nested level will be an index in the nested nthreads array
1361 int level = this_thr->th.th_team->t.t_level;
1362 // Thread value exists in the nested nthreads array for the next nested
1363 // level
1364 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1365 this_thr->th.th_current_task->td_icvs.nproc =
1366 __kmp_nested_nth.nth[level + 1];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001367 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001368 serial_team->t.t_level++;
1369 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
1370 "of serial team %p to %d\n",
1371 global_tid, serial_team, serial_team->t.t_level));
1372
1373 /* allocate/push dispatch buffers stack */
1374 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1375 {
1376 dispatch_private_info_t *disp_buffer =
1377 (dispatch_private_info_t *)__kmp_allocate(
1378 sizeof(dispatch_private_info_t));
1379 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1380 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1381 }
1382 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1383
1384 KMP_MB();
1385 }
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001386#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001387 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001388#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001389
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001390#if OMP_50_ENABLED
1391 // Perform the display affinity functionality for
1392 // serialized parallel regions
1393 if (__kmp_display_affinity) {
1394 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1395 this_thr->th.th_prev_num_threads != 1) {
1396 // NULL means use the affinity-format-var ICV
1397 __kmp_aux_display_affinity(global_tid, NULL);
1398 this_thr->th.th_prev_level = serial_team->t.t_level;
1399 this_thr->th.th_prev_num_threads = 1;
1400 }
1401 }
1402#endif
1403
Jonathan Peyton30419822017-05-12 18:01:32 +00001404 if (__kmp_env_consistency_check)
1405 __kmp_push_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001406#if OMPT_SUPPORT
1407 serial_team->t.ompt_team_info.master_return_address = codeptr;
1408 if (ompt_enabled.enabled &&
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001409 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1410 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001411
1412 ompt_lw_taskteam_t lw_taskteam;
1413 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1414 &ompt_parallel_data, codeptr);
1415
1416 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1417 // don't use lw_taskteam after linking. content was swaped
1418
1419 /* OMPT implicit task begin */
1420 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1421 if (ompt_enabled.ompt_callback_implicit_task) {
1422 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1423 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
Joachim Protze2b46d302019-01-15 15:36:53 +00001424 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Joachim Protze9be9cf22018-05-07 12:42:21 +00001425 OMPT_CUR_TASK_INFO(this_thr)
1426 ->thread_num = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +00001427 }
1428
1429 /* OMPT state */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001430 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1431 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
Joachim Protze82e94a52017-11-01 10:08:30 +00001432 }
1433#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001434}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001435
Jim Cownie5e8470a2013-09-27 10:38:44 +00001436/* most of the work for a fork */
1437/* return true if we really went parallel, false if serialized */
Jonathan Peyton30419822017-05-12 18:01:32 +00001438int __kmp_fork_call(ident_t *loc, int gtid,
1439 enum fork_context_e call_context, // Intel, GNU, ...
Joachim Protze82e94a52017-11-01 10:08:30 +00001440 kmp_int32 argc, microtask_t microtask, launch_t invoker,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001441/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001442#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001443 va_list *ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001444#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001445 va_list ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001446#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001447 ) {
1448 void **argv;
1449 int i;
1450 int master_tid;
1451 int master_this_cons;
1452 kmp_team_t *team;
1453 kmp_team_t *parent_team;
1454 kmp_info_t *master_th;
1455 kmp_root_t *root;
1456 int nthreads;
1457 int master_active;
1458 int master_set_numthreads;
1459 int level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001460#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001461 int active_level;
1462 int teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001463#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001464#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001465 kmp_hot_team_ptr_t **p_hot_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001466#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001467 { // KMP_TIME_BLOCK
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001468 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001469 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001470
Jonathan Peyton30419822017-05-12 18:01:32 +00001471 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid));
1472 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1473 /* Some systems prefer the stack for the root thread(s) to start with */
1474 /* some gap from the parent stack to prevent false sharing. */
1475 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1476 /* These 2 lines below are so this does not get optimized out */
1477 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1478 __kmp_stkpadding += (short)((kmp_int64)dummy);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001479 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001480
1481 /* initialize if needed */
Jonathan Peyton30419822017-05-12 18:01:32 +00001482 KMP_DEBUG_ASSERT(
1483 __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown
1484 if (!TCR_4(__kmp_init_parallel))
1485 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001486
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00001487#if OMP_50_ENABLED
1488 __kmp_resume_if_soft_paused();
1489#endif
1490
Jim Cownie5e8470a2013-09-27 10:38:44 +00001491 /* setup current data */
Jonathan Peyton30419822017-05-12 18:01:32 +00001492 master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with
1493 // shutdown
1494 parent_team = master_th->th.th_team;
1495 master_tid = master_th->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001496 master_this_cons = master_th->th.th_local.this_construct;
Jonathan Peyton30419822017-05-12 18:01:32 +00001497 root = master_th->th.th_root;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001498 master_active = root->r.r_active;
1499 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001500
1501#if OMPT_SUPPORT
Jonathan Peyton3574f282018-10-04 14:57:04 +00001502 ompt_data_t ompt_parallel_data = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001503 ompt_data_t *parent_task_data;
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001504 ompt_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001505 ompt_data_t *implicit_task_data;
1506 void *return_address = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001507
Joachim Protze82e94a52017-11-01 10:08:30 +00001508 if (ompt_enabled.enabled) {
1509 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1510 NULL, NULL);
1511 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001512 }
1513#endif
1514
Jim Cownie5e8470a2013-09-27 10:38:44 +00001515 // Nested level will be an index in the nested nthreads array
Jonathan Peyton30419822017-05-12 18:01:32 +00001516 level = parent_team->t.t_level;
1517 // used to launch non-serial teams even if nested is not allowed
1518 active_level = parent_team->t.t_active_level;
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001519#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00001520 // needed to check nesting inside the teams
1521 teams_level = master_th->th.th_teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001522#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001523#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001524 p_hot_teams = &master_th->th.th_hot_teams;
1525 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1526 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1527 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1528 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
Jonathan Peyton642688b2017-06-01 16:46:36 +00001529 // it is either actual or not needed (when active_level > 0)
1530 (*p_hot_teams)[0].hot_team_nth = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001531 }
1532#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001533
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001534#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001535 if (ompt_enabled.enabled) {
1536 if (ompt_enabled.ompt_callback_parallel_begin) {
1537 int team_size = master_set_numthreads
1538 ? master_set_numthreads
1539 : get__nproc_2(parent_team, master_tid);
1540 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1541 parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
1542 OMPT_INVOKER(call_context), return_address);
1543 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001544 master_th->th.ompt_thread_info.state = ompt_state_overhead;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001545 }
1546#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001547
Jim Cownie5e8470a2013-09-27 10:38:44 +00001548 master_th->th.th_ident = loc;
1549
1550#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001551 if (master_th->th.th_teams_microtask && ap &&
1552 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1553 // AC: This is start of parallel that is nested inside teams construct.
1554 // The team is actual (hot), all workers are ready at the fork barrier.
1555 // No lock needed to initialize the team a bit, then free workers.
1556 parent_team->t.t_ident = loc;
1557 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1558 parent_team->t.t_argc = argc;
1559 argv = (void **)parent_team->t.t_argv;
1560 for (i = argc - 1; i >= 0; --i)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001561/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001562#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001563 *argv++ = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001564#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001565 *argv++ = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001566#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001567 // Increment our nested depth levels, but not increase the serialization
1568 if (parent_team == master_th->th.th_serial_team) {
1569 // AC: we are in serialized parallel
1570 __kmpc_serialized_parallel(loc, gtid);
1571 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1572 // AC: need this in order enquiry functions work
1573 // correctly, will restore at join time
1574 parent_team->t.t_serialized--;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001575#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001576 void *dummy;
1577 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001578
Jonathan Peyton30419822017-05-12 18:01:32 +00001579 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001580
Joachim Protze82e94a52017-11-01 10:08:30 +00001581 if (ompt_enabled.enabled) {
1582 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1583 &ompt_parallel_data, return_address);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001584 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001585
Joachim Protze82e94a52017-11-01 10:08:30 +00001586 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1587 // don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001588
Jonathan Peyton30419822017-05-12 18:01:32 +00001589 /* OMPT implicit task begin */
Joachim Protze82e94a52017-11-01 10:08:30 +00001590 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1591 if (ompt_enabled.ompt_callback_implicit_task) {
1592 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1593 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
Joachim Protze2b46d302019-01-15 15:36:53 +00001594 implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Joachim Protze9be9cf22018-05-07 12:42:21 +00001595 OMPT_CUR_TASK_INFO(master_th)
1596 ->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001597 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001598
Jonathan Peyton30419822017-05-12 18:01:32 +00001599 /* OMPT state */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001600 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001601 } else {
1602 exit_runtime_p = &dummy;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001603 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001604#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001605
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001606 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001607 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1608 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1609 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1610#if OMPT_SUPPORT
1611 ,
1612 exit_runtime_p
1613#endif
1614 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001615 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001616
Jonathan Peyton30419822017-05-12 18:01:32 +00001617#if OMPT_SUPPORT
1618 *exit_runtime_p = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001619 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001620 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001621 if (ompt_enabled.ompt_callback_implicit_task) {
1622 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1623 ompt_scope_end, NULL, implicit_task_data, 1,
Joachim Protze2b46d302019-01-15 15:36:53 +00001624 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Jonathan Peyton30419822017-05-12 18:01:32 +00001625 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001626 __ompt_lw_taskteam_unlink(master_th);
Jonathan Peyton30419822017-05-12 18:01:32 +00001627
Joachim Protze82e94a52017-11-01 10:08:30 +00001628 if (ompt_enabled.ompt_callback_parallel_end) {
1629 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1630 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
1631 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001632 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001633 master_th->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001634 }
1635#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001636 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +00001637 }
1638
1639 parent_team->t.t_pkfn = microtask;
Jonathan Peyton30419822017-05-12 18:01:32 +00001640 parent_team->t.t_invoke = invoker;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001641 KMP_ATOMIC_INC(&root->r.r_in_parallel);
Jonathan Peyton30419822017-05-12 18:01:32 +00001642 parent_team->t.t_active_level++;
1643 parent_team->t.t_level++;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001644#if OMP_50_ENABLED
1645 parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
1646#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001647
1648 /* Change number of threads in the team if requested */
1649 if (master_set_numthreads) { // The parallel has num_threads clause
1650 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1651 // AC: only can reduce number of threads dynamically, can't increase
1652 kmp_info_t **other_threads = parent_team->t.t_threads;
1653 parent_team->t.t_nproc = master_set_numthreads;
1654 for (i = 0; i < master_set_numthreads; ++i) {
1655 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1656 }
1657 // Keep extra threads hot in the team for possible next parallels
1658 }
1659 master_th->th.th_set_nproc = 0;
1660 }
1661
1662#if USE_DEBUGGER
1663 if (__kmp_debugging) { // Let debugger override number of threads.
1664 int nth = __kmp_omp_num_threads(loc);
Jonathan Peyton642688b2017-06-01 16:46:36 +00001665 if (nth > 0) { // 0 means debugger doesn't want to change num threads
Jonathan Peyton30419822017-05-12 18:01:32 +00001666 master_set_numthreads = nth;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001667 }
1668 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001669#endif
1670
1671 KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
1672 "master_th=%p, gtid=%d\n",
1673 root, parent_team, master_th, gtid));
1674 __kmp_internal_fork(loc, gtid, parent_team);
1675 KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, "
1676 "master_th=%p, gtid=%d\n",
1677 root, parent_team, master_th, gtid));
1678
1679 /* Invoke microtask for MASTER thread */
1680 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1681 parent_team->t.t_id, parent_team->t.t_pkfn));
1682
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001683 if (!parent_team->t.t_invoke(gtid)) {
1684 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
Jonathan Peyton30419822017-05-12 18:01:32 +00001685 }
1686 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1687 parent_team->t.t_id, parent_team->t.t_pkfn));
1688 KMP_MB(); /* Flush all pending memory write invalidates. */
1689
1690 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
1691
1692 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001693 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001694#endif /* OMP_40_ENABLED */
1695
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001696#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001697 if (__kmp_tasking_mode != tskm_immediate_exec) {
1698 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1699 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001700 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001701#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001702
Jonathan Peyton30419822017-05-12 18:01:32 +00001703 if (parent_team->t.t_active_level >=
1704 master_th->th.th_current_task->td_icvs.max_active_levels) {
1705 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001706 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001707#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001708 int enter_teams = ((ap == NULL && active_level == 0) ||
1709 (ap && teams_level > 0 && teams_level == level));
Andrey Churbanov92effc42015-08-18 10:08:27 +00001710#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001711 nthreads =
1712 master_set_numthreads
1713 ? master_set_numthreads
1714 : get__nproc_2(
1715 parent_team,
1716 master_tid); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001717
Jonathan Peyton30419822017-05-12 18:01:32 +00001718 // Check if we need to take forkjoin lock? (no need for serialized
1719 // parallel out of teams construct). This code moved here from
1720 // __kmp_reserve_threads() to speedup nested serialized parallels.
1721 if (nthreads > 1) {
Jonathan Peyton76b45e82019-02-28 20:47:21 +00001722 if ((get__max_active_levels(master_th) == 1 && (root->r.r_in_parallel
Andrey Churbanov92effc42015-08-18 10:08:27 +00001723#if OMP_40_ENABLED
Jonathan Peyton76b45e82019-02-28 20:47:21 +00001724 && !enter_teams
Andrey Churbanov92effc42015-08-18 10:08:27 +00001725#endif /* OMP_40_ENABLED */
Jonathan Peyton76b45e82019-02-28 20:47:21 +00001726 )) ||
Jonathan Peyton30419822017-05-12 18:01:32 +00001727 (__kmp_library == library_serial)) {
Jonathan Peyton642688b2017-06-01 16:46:36 +00001728 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"
1729 " threads\n",
1730 gtid, nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +00001731 nthreads = 1;
Andrey Churbanov92effc42015-08-18 10:08:27 +00001732 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001733 }
1734 if (nthreads > 1) {
1735 /* determine how many new threads we can use */
1736 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jonathan Peyton30419822017-05-12 18:01:32 +00001737 nthreads = __kmp_reserve_threads(
1738 root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001739#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001740 /* AC: If we execute teams from parallel region (on host), then
1741 teams should be created but each can only have 1 thread if
1742 nesting is disabled. If teams called from serial region, then
1743 teams and their threads should be created regardless of the
1744 nesting setting. */
1745 ,
1746 enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001747#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001748 );
1749 if (nthreads == 1) {
1750 // Free lock for single thread execution here; for multi-thread
1751 // execution it will be freed later after team of threads created
1752 // and initialized
1753 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Andrey Churbanov92effc42015-08-18 10:08:27 +00001754 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001755 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001756 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001757 KMP_DEBUG_ASSERT(nthreads > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001758
Jonathan Peyton30419822017-05-12 18:01:32 +00001759 // If we temporarily changed the set number of threads then restore it now
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001760 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001761
Jim Cownie5e8470a2013-09-27 10:38:44 +00001762 /* create a serialized parallel region? */
Jonathan Peyton30419822017-05-12 18:01:32 +00001763 if (nthreads == 1) {
1764/* josh todo: hypothetical question: what do we do for OS X*? */
1765#if KMP_OS_LINUX && \
1766 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1767 void *args[argc];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001768#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001769 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *));
1770#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1771 KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001772
Jonathan Peyton30419822017-05-12 18:01:32 +00001773 KA_TRACE(20,
1774 ("__kmp_fork_call: T#%d serializing parallel region\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001775
Jonathan Peyton30419822017-05-12 18:01:32 +00001776 __kmpc_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001777
Jonathan Peyton30419822017-05-12 18:01:32 +00001778 if (call_context == fork_context_intel) {
1779 /* TODO this sucks, use the compiler itself to pass args! :) */
1780 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001781#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001782 if (!ap) {
1783 // revert change made in __kmpc_serialized_parallel()
1784 master_th->th.th_serial_team->t.t_level--;
1785// Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001786
1787#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001788 void *dummy;
1789 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00001790 ompt_task_info_t *task_info;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001791
Jonathan Peyton30419822017-05-12 18:01:32 +00001792 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001793
Joachim Protze82e94a52017-11-01 10:08:30 +00001794 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001795 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
Joachim Protze82e94a52017-11-01 10:08:30 +00001796 &ompt_parallel_data, return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001797
Joachim Protze82e94a52017-11-01 10:08:30 +00001798 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1799 // don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001800
Joachim Protze82e94a52017-11-01 10:08:30 +00001801 task_info = OMPT_CUR_TASK_INFO(master_th);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001802 exit_runtime_p = &(task_info->frame.exit_frame.ptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00001803 if (ompt_enabled.ompt_callback_implicit_task) {
1804 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1805 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
Joachim Protze2b46d302019-01-15 15:36:53 +00001806 &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Joachim Protze9be9cf22018-05-07 12:42:21 +00001807 OMPT_CUR_TASK_INFO(master_th)
1808 ->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001809 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001810
Jonathan Peyton30419822017-05-12 18:01:32 +00001811 /* OMPT state */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001812 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001813 } else {
1814 exit_runtime_p = &dummy;
1815 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001816#endif
1817
Jonathan Peyton30419822017-05-12 18:01:32 +00001818 {
1819 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1820 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1821 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1822 parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001823#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001824 ,
1825 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001826#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001827 );
1828 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001829
1830#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001831 if (ompt_enabled.enabled) {
1832 exit_runtime_p = NULL;
1833 if (ompt_enabled.ompt_callback_implicit_task) {
1834 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1835 ompt_scope_end, NULL, &(task_info->task_data), 1,
Joachim Protze2b46d302019-01-15 15:36:53 +00001836 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Jonathan Peyton30419822017-05-12 18:01:32 +00001837 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001838
Jonathan Peyton30419822017-05-12 18:01:32 +00001839 __ompt_lw_taskteam_unlink(master_th);
Joachim Protze82e94a52017-11-01 10:08:30 +00001840 if (ompt_enabled.ompt_callback_parallel_end) {
1841 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1842 OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
1843 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001844 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001845 master_th->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001846 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001847#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001848 } else if (microtask == (microtask_t)__kmp_teams_master) {
1849 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1850 master_th->th.th_serial_team);
1851 team = master_th->th.th_team;
1852 // team->t.t_pkfn = microtask;
1853 team->t.t_invoke = invoker;
1854 __kmp_alloc_argv_entries(argc, team, TRUE);
1855 team->t.t_argc = argc;
1856 argv = (void **)team->t.t_argv;
1857 if (ap) {
1858 for (i = argc - 1; i >= 0; --i)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001859// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001860#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001861 *argv++ = va_arg(*ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001862#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001863 *argv++ = va_arg(ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001864#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001865 } else {
1866 for (i = 0; i < argc; ++i)
1867 // Get args from parent team for teams construct
1868 argv[i] = parent_team->t.t_argv[i];
1869 }
1870 // AC: revert change made in __kmpc_serialized_parallel()
1871 // because initial code in teams should have level=0
1872 team->t.t_level--;
1873 // AC: call special invoker for outer "parallel" of teams construct
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001874 invoker(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001875 } else {
1876#endif /* OMP_40_ENABLED */
1877 argv = args;
1878 for (i = argc - 1; i >= 0; --i)
1879// TODO: revert workaround for Intel(R) 64 tracker #96
1880#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1881 *argv++ = va_arg(*ap, void *);
1882#else
1883 *argv++ = va_arg(ap, void *);
1884#endif
1885 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001886
1887#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001888 void *dummy;
1889 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00001890 ompt_task_info_t *task_info;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001891
Jonathan Peyton30419822017-05-12 18:01:32 +00001892 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001893
Joachim Protze82e94a52017-11-01 10:08:30 +00001894 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001895 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
Joachim Protze82e94a52017-11-01 10:08:30 +00001896 &ompt_parallel_data, return_address);
1897 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1898 // don't use lw_taskteam after linking. content was swaped
1899 task_info = OMPT_CUR_TASK_INFO(master_th);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001900 exit_runtime_p = &(task_info->frame.exit_frame.ptr);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001901
Jonathan Peyton30419822017-05-12 18:01:32 +00001902 /* OMPT implicit task begin */
Joachim Protze82e94a52017-11-01 10:08:30 +00001903 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1904 if (ompt_enabled.ompt_callback_implicit_task) {
1905 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1906 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
Joachim Protze2b46d302019-01-15 15:36:53 +00001907 implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Joachim Protze9be9cf22018-05-07 12:42:21 +00001908 OMPT_CUR_TASK_INFO(master_th)
1909 ->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001910 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001911
Jonathan Peyton30419822017-05-12 18:01:32 +00001912 /* OMPT state */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001913 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001914 } else {
1915 exit_runtime_p = &dummy;
1916 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001917#endif
1918
Jonathan Peyton30419822017-05-12 18:01:32 +00001919 {
1920 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1921 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1922 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001923#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001924 ,
1925 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001926#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001927 );
1928 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001929
1930#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001931 if (ompt_enabled.enabled) {
1932 *exit_runtime_p = NULL;
1933 if (ompt_enabled.ompt_callback_implicit_task) {
1934 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1935 ompt_scope_end, NULL, &(task_info->task_data), 1,
Joachim Protze2b46d302019-01-15 15:36:53 +00001936 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Jonathan Peyton30419822017-05-12 18:01:32 +00001937 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001938
Joachim Protze82e94a52017-11-01 10:08:30 +00001939 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
Jonathan Peyton30419822017-05-12 18:01:32 +00001940 __ompt_lw_taskteam_unlink(master_th);
Joachim Protze82e94a52017-11-01 10:08:30 +00001941 if (ompt_enabled.ompt_callback_parallel_end) {
1942 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1943 &ompt_parallel_data, parent_task_data,
1944 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001945 }
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001946 master_th->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001947 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001948#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001949#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001950 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001951#endif /* OMP_40_ENABLED */
1952 } else if (call_context == fork_context_gnu) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001953#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001954 ompt_lw_taskteam_t lwt;
1955 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1956 return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001957
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00001958 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00001959 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1960// don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001961#endif
1962
Jonathan Peyton30419822017-05-12 18:01:32 +00001963 // we were called from GNU native code
1964 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965 return FALSE;
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001966 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001967 KMP_ASSERT2(call_context < fork_context_last,
1968 "__kmp_fork_call: unknown fork_context parameter");
1969 }
1970
1971 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
1972 KMP_MB();
1973 return FALSE;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00001974 } // if (nthreads == 1)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001975
Jim Cownie5e8470a2013-09-27 10:38:44 +00001976 // GEH: only modify the executing flag in the case when not serialized
1977 // serialized case is handled in kmpc_serialized_parallel
Jonathan Peyton30419822017-05-12 18:01:32 +00001978 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1979 "curtask=%p, curtask_max_aclevel=%d\n",
1980 parent_team->t.t_active_level, master_th,
1981 master_th->th.th_current_task,
1982 master_th->th.th_current_task->td_icvs.max_active_levels));
1983 // TODO: GEH - cannot do this assertion because root thread not set up as
1984 // executing
Jim Cownie5e8470a2013-09-27 10:38:44 +00001985 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1986 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001987
1988#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001989 if (!master_th->th.th_teams_microtask || level > teams_level)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001990#endif /* OMP_40_ENABLED */
1991 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001992 /* Increment our nested depth level */
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001993 KMP_ATOMIC_INC(&root->r.r_in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994 }
1995
Jim Cownie5e8470a2013-09-27 10:38:44 +00001996 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001997 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jonathan Peyton30419822017-05-12 18:01:32 +00001998 if ((level + 1 < __kmp_nested_nth.used) &&
1999 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2000 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2001 } else {
2002 nthreads_icv = 0; // don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00002003 }
2004
2005#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00002006 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002007 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jonathan Peyton30419822017-05-12 18:01:32 +00002008 kmp_proc_bind_t proc_bind_icv =
2009 proc_bind_default; // proc_bind_default means don't update
2010 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2011 proc_bind = proc_bind_false;
2012 } else {
2013 if (proc_bind == proc_bind_default) {
2014 // No proc_bind clause specified; use current proc-bind-var for this
2015 // parallel region
2016 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2017 }
2018 /* else: The proc_bind policy was specified explicitly on parallel clause.
2019 This overrides proc-bind-var for this parallel region, but does not
2020 change proc-bind-var. */
2021 // Figure the value of proc-bind-var for the child threads.
2022 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2023 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2024 master_th->th.th_current_task->td_icvs.proc_bind)) {
2025 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2026 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002027 }
2028
Jim Cownie5e8470a2013-09-27 10:38:44 +00002029 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00002030 master_th->th.th_set_proc_bind = proc_bind_default;
2031#endif /* OMP_40_ENABLED */
2032
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002033 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002034#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002035 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002036#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002037 ) {
2038 kmp_internal_control_t new_icvs;
2039 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2040 new_icvs.next = NULL;
2041 if (nthreads_icv > 0) {
2042 new_icvs.nproc = nthreads_icv;
2043 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002044
2045#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002046 if (proc_bind_icv != proc_bind_default) {
2047 new_icvs.proc_bind = proc_bind_icv;
2048 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002049#endif /* OMP_40_ENABLED */
2050
Jonathan Peyton30419822017-05-12 18:01:32 +00002051 /* allocate a new parallel team */
2052 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2053 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002054#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002055 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002056#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002057#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002058 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002059#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002060 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002061 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002062 /* allocate a new parallel team */
2063 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2064 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002065#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002066 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002067#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002068#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002069 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002070#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002071 &master_th->th.th_current_task->td_icvs,
2072 argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002073 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002074 KF_TRACE(
2075 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002076
2077 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002078 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2079 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2080 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2081 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2082 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002083#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002084 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2085 return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002086#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002087 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
2088// TODO: parent_team->t.t_level == INT_MAX ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00002089#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002090 if (!master_th->th.th_teams_microtask || level > teams_level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002091#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002092 int new_level = parent_team->t.t_level + 1;
2093 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2094 new_level = parent_team->t.t_active_level + 1;
2095 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002096#if OMP_40_ENABLED
2097 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002098 // AC: Do not increase parallel level at start of the teams construct
2099 int new_level = parent_team->t.t_level;
2100 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2101 new_level = parent_team->t.t_active_level;
2102 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002103 }
2104#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002105 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00002106 // set master's schedule as new run-time schedule
2107 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002108
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002109#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002110 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002111#endif
Jonathan Peyton92ca6182018-09-07 18:25:49 +00002112#if OMP_50_ENABLED
2113 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2114#endif
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002115
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002116 // Update the floating point rounding in the team if required.
2117 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002118
Jonathan Peyton30419822017-05-12 18:01:32 +00002119 if (__kmp_tasking_mode != tskm_immediate_exec) {
2120 // Set master's task team to team's task team. Unless this is hot team, it
2121 // should be NULL.
Jonathan Peyton30419822017-05-12 18:01:32 +00002122 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2123 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peyton30419822017-05-12 18:01:32 +00002124 KA_TRACE(20, ("__kmp_fork_call: Master T#%d pushing task_team %p / team "
2125 "%p, new task_team %p / team %p\n",
2126 __kmp_gtid_from_thread(master_th),
2127 master_th->th.th_task_team, parent_team,
2128 team->t.t_task_team[master_th->th.th_task_state], team));
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002129
Jonathan Peyton30419822017-05-12 18:01:32 +00002130 if (active_level || master_th->th.th_task_team) {
2131 // Take a memo of master's task_state
2132 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2133 if (master_th->th.th_task_state_top >=
2134 master_th->th.th_task_state_stack_sz) { // increase size
2135 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2136 kmp_uint8 *old_stack, *new_stack;
2137 kmp_uint32 i;
2138 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2139 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2140 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2141 }
2142 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2143 ++i) { // zero-init rest of stack
2144 new_stack[i] = 0;
2145 }
2146 old_stack = master_th->th.th_task_state_memo_stack;
2147 master_th->th.th_task_state_memo_stack = new_stack;
2148 master_th->th.th_task_state_stack_sz = new_size;
2149 __kmp_free(old_stack);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002150 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002151 // Store master's task_state on stack
2152 master_th->th
2153 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2154 master_th->th.th_task_state;
2155 master_th->th.th_task_state_top++;
2156#if KMP_NESTED_HOT_TEAMS
Jonathan Peytonca10a762018-08-24 18:05:00 +00002157 if (master_th->th.th_hot_teams &&
Jonathan Peytonf4c07202018-11-28 20:15:11 +00002158 active_level < __kmp_hot_teams_max_level &&
Jonathan Peytonca10a762018-08-24 18:05:00 +00002159 team == master_th->th.th_hot_teams[active_level].hot_team) {
Jonathan Peyton642688b2017-06-01 16:46:36 +00002160 // Restore master's nested state if nested hot team
Jonathan Peyton30419822017-05-12 18:01:32 +00002161 master_th->th.th_task_state =
2162 master_th->th
2163 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2164 } else {
2165#endif
2166 master_th->th.th_task_state = 0;
2167#if KMP_NESTED_HOT_TEAMS
2168 }
2169#endif
2170 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002171#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002172 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2173 (team == root->r.r_hot_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002174#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002175 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002176
Jonathan Peyton30419822017-05-12 18:01:32 +00002177 KA_TRACE(
2178 20,
2179 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2180 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2181 team->t.t_nproc));
2182 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2183 (team->t.t_master_tid == 0 &&
2184 (team->t.t_parent == root->r.r_root_team ||
2185 team->t.t_parent->t.t_serialized)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002186 KMP_MB();
2187
2188 /* now, setup the arguments */
Jonathan Peyton30419822017-05-12 18:01:32 +00002189 argv = (void **)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002190#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002191 if (ap) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002192#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002193 for (i = argc - 1; i >= 0; --i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002194// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002195#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00002196 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002197#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002198 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002199#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002200 KMP_CHECK_UPDATE(*argv, new_argv);
2201 argv++;
2202 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002203#if OMP_40_ENABLED
2204 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002205 for (i = 0; i < argc; ++i) {
2206 // Get args from parent team for teams construct
2207 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2208 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002209 }
2210#endif /* OMP_40_ENABLED */
2211
2212 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002213 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002214 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
Jonathan Peyton30419822017-05-12 18:01:32 +00002215 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002216
Jonathan Peyton30419822017-05-12 18:01:32 +00002217 __kmp_fork_team_threads(root, team, master_th, gtid);
2218 __kmp_setup_icv_copy(team, nthreads,
2219 &master_th->th.th_current_task->td_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002220
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002221#if OMPT_SUPPORT
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002222 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002223#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002224
Jonathan Peyton30419822017-05-12 18:01:32 +00002225 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002226
Jim Cownie5e8470a2013-09-27 10:38:44 +00002227#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002228 if (team->t.t_active_level == 1 // only report frames at level 1
2229#if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002230 && !master_th->th.th_teams_microtask // not in teams construct
Jonathan Peyton30419822017-05-12 18:01:32 +00002231#endif /* OMP_40_ENABLED */
2232 ) {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002233#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002234 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2235 (__kmp_forkjoin_frames_mode == 3 ||
2236 __kmp_forkjoin_frames_mode == 1)) {
2237 kmp_uint64 tmp_time = 0;
2238 if (__itt_get_timestamp_ptr)
2239 tmp_time = __itt_get_timestamp();
2240 // Internal fork - report frame begin
2241 master_th->th.th_frame_time = tmp_time;
2242 if (__kmp_forkjoin_frames_mode == 3)
2243 team->t.t_region_time = tmp_time;
Jonathan Peyton642688b2017-06-01 16:46:36 +00002244 } else
2245// only one notification scheme (either "submit" or "forking/joined", not both)
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002246#endif /* USE_ITT_NOTIFY */
Jonathan Peyton30419822017-05-12 18:01:32 +00002247 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2248 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
Jonathan Peyton8c432f22018-01-04 22:56:47 +00002249 // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
Jonathan Peyton30419822017-05-12 18:01:32 +00002250 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2251 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002252 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002253#endif /* USE_ITT_BUILD */
2254
2255 /* now go on and do the work */
Jonathan Peyton30419822017-05-12 18:01:32 +00002256 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002257 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00002258 KF_TRACE(10,
2259 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2260 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002261
2262#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002263 if (__itt_stack_caller_create_ptr) {
2264 team->t.t_stack_id =
2265 __kmp_itt_stack_caller_create(); // create new stack stitching id
2266 // before entering fork barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002267 }
2268#endif /* USE_ITT_BUILD */
2269
2270#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00002271 // AC: skip __kmp_internal_fork at teams construct, let only master
2272 // threads execute
2273 if (ap)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002274#endif /* OMP_40_ENABLED */
2275 {
Jonathan Peyton30419822017-05-12 18:01:32 +00002276 __kmp_internal_fork(loc, gtid, team);
2277 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "
2278 "master_th=%p, gtid=%d\n",
2279 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002280 }
2281
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002282 if (call_context == fork_context_gnu) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002283 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2284 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002285 }
2286
2287 /* Invoke microtask for MASTER thread */
Jonathan Peyton30419822017-05-12 18:01:32 +00002288 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2289 team->t.t_id, team->t.t_pkfn));
2290 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002291
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00002292 if (!team->t.t_invoke(gtid)) {
2293 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
Jonathan Peyton30419822017-05-12 18:01:32 +00002294 }
2295 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2296 team->t.t_id, team->t.t_pkfn));
2297 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002298
Jonathan Peyton30419822017-05-12 18:01:32 +00002299 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002300
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002301#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002302 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002303 master_th->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00002304 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002305#endif
2306
Jonathan Peyton30419822017-05-12 18:01:32 +00002307 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002308}
2309
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002310#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002311static inline void __kmp_join_restore_state(kmp_info_t *thread,
2312 kmp_team_t *team) {
2313 // restore state outside the region
2314 thread->th.ompt_thread_info.state =
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002315 ((team->t.t_serialized) ? ompt_state_work_serial
2316 : ompt_state_work_parallel);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002317}
2318
Joachim Protze82e94a52017-11-01 10:08:30 +00002319static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
2320 kmp_team_t *team, ompt_data_t *parallel_data,
2321 fork_context_e fork_context, void *codeptr) {
2322 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2323 if (ompt_enabled.ompt_callback_parallel_end) {
2324 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2325 parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
2326 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002327 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002328
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002329 task_info->frame.enter_frame = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +00002330 __kmp_join_restore_state(thread, team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002331}
2332#endif
2333
Jonathan Peyton30419822017-05-12 18:01:32 +00002334void __kmp_join_call(ident_t *loc, int gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002335#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002336 ,
2337 enum fork_context_e fork_context
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002338#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002339#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002340 ,
2341 int exit_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00002342#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002343 ) {
2344 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2345 kmp_team_t *team;
2346 kmp_team_t *parent_team;
2347 kmp_info_t *master_th;
2348 kmp_root_t *root;
2349 int master_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002350
Jonathan Peyton30419822017-05-12 18:01:32 +00002351 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002352
Jonathan Peyton30419822017-05-12 18:01:32 +00002353 /* setup current data */
2354 master_th = __kmp_threads[gtid];
2355 root = master_th->th.th_root;
2356 team = master_th->th.th_team;
2357 parent_team = team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002358
Jonathan Peyton30419822017-05-12 18:01:32 +00002359 master_th->th.th_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002360
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002361#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002362 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002363 master_th->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00002364 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002365#endif
2366
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002367#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00002368 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2369 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2370 "th_task_team = %p\n",
2371 __kmp_gtid_from_thread(master_th), team,
2372 team->t.t_task_team[master_th->th.th_task_state],
2373 master_th->th.th_task_team));
2374 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2375 team->t.t_task_team[master_th->th.th_task_state]);
2376 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002377#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002378
Jonathan Peyton30419822017-05-12 18:01:32 +00002379 if (team->t.t_serialized) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002380#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002381 if (master_th->th.th_teams_microtask) {
2382 // We are in teams construct
2383 int level = team->t.t_level;
2384 int tlevel = master_th->th.th_teams_level;
2385 if (level == tlevel) {
2386 // AC: we haven't incremented it earlier at start of teams construct,
2387 // so do it here - at the end of teams construct
2388 team->t.t_level++;
2389 } else if (level == tlevel + 1) {
2390 // AC: we are exiting parallel inside teams, need to increment
2391 // serialization in order to restore it in the next call to
2392 // __kmpc_end_serialized_parallel
2393 team->t.t_serialized++;
2394 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002395 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002396#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002397 __kmpc_end_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002398
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002399#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002400 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002401 __kmp_join_restore_state(master_th, parent_team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002402 }
2403#endif
2404
Jonathan Peyton30419822017-05-12 18:01:32 +00002405 return;
2406 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002407
Jonathan Peyton30419822017-05-12 18:01:32 +00002408 master_active = team->t.t_master_active;
2409
2410#if OMP_40_ENABLED
2411 if (!exit_teams)
2412#endif /* OMP_40_ENABLED */
2413 {
2414 // AC: No barrier for internal teams at exit from teams construct.
2415 // But there is barrier for external team (league).
2416 __kmp_internal_join(loc, gtid, team);
2417 }
2418#if OMP_40_ENABLED
2419 else {
2420 master_th->th.th_task_state =
2421 0; // AC: no tasking in teams (out of any parallel)
2422 }
2423#endif /* OMP_40_ENABLED */
2424
2425 KMP_MB();
2426
2427#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002428 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2429 void *codeptr = team->t.ompt_team_info.master_return_address;
Jonathan Peyton30419822017-05-12 18:01:32 +00002430#endif
2431
2432#if USE_ITT_BUILD
2433 if (__itt_stack_caller_create_ptr) {
2434 __kmp_itt_stack_caller_destroy(
2435 (__itt_caller)team->t
2436 .t_stack_id); // destroy the stack stitching id after join barrier
2437 }
2438
Jonathan Peyton8c432f22018-01-04 22:56:47 +00002439 // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
Jonathan Peyton30419822017-05-12 18:01:32 +00002440 if (team->t.t_active_level == 1
2441#if OMP_40_ENABLED
2442 && !master_th->th.th_teams_microtask /* not in teams construct */
2443#endif /* OMP_40_ENABLED */
2444 ) {
2445 master_th->th.th_ident = loc;
2446 // only one notification scheme (either "submit" or "forking/joined", not
2447 // both)
2448 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2449 __kmp_forkjoin_frames_mode == 3)
2450 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2451 master_th->th.th_frame_time, 0, loc,
2452 master_th->th.th_team_nproc, 1);
2453 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2454 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2455 __kmp_itt_region_joined(gtid);
2456 } // active_level == 1
2457#endif /* USE_ITT_BUILD */
2458
2459#if OMP_40_ENABLED
2460 if (master_th->th.th_teams_microtask && !exit_teams &&
2461 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2462 team->t.t_level == master_th->th.th_teams_level + 1) {
2463 // AC: We need to leave the team structure intact at the end of parallel
2464 // inside the teams construct, so that at the next parallel same (hot) team
2465 // works, only adjust nesting levels
2466
2467 /* Decrement our nested depth level */
2468 team->t.t_level--;
2469 team->t.t_active_level--;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00002470 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
Jonathan Peyton30419822017-05-12 18:01:32 +00002471
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00002472 // Restore number of threads in the team if needed. This code relies on
2473 // the proper adjustment of th_teams_size.nth after the fork in
2474 // __kmp_teams_master on each teams master in the case that
2475 // __kmp_reserve_threads reduced it.
Jonathan Peyton30419822017-05-12 18:01:32 +00002476 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2477 int old_num = master_th->th.th_team_nproc;
2478 int new_num = master_th->th.th_teams_size.nth;
2479 kmp_info_t **other_threads = team->t.t_threads;
2480 team->t.t_nproc = new_num;
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00002481 for (int i = 0; i < old_num; ++i) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002482 other_threads[i]->th.th_team_nproc = new_num;
2483 }
2484 // Adjust states of non-used threads of the team
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00002485 for (int i = old_num; i < new_num; ++i) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002486 // Re-initialize thread's barrier data.
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00002487 KMP_DEBUG_ASSERT(other_threads[i]);
Jonathan Peyton30419822017-05-12 18:01:32 +00002488 kmp_balign_t *balign = other_threads[i]->th.th_bar;
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00002489 for (int b = 0; b < bs_last_barrier; ++b) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002490 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2491 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2492#if USE_DEBUGGER
2493 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2494#endif
2495 }
2496 if (__kmp_tasking_mode != tskm_immediate_exec) {
2497 // Synchronize thread's task state
2498 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2499 }
2500 }
2501 }
2502
2503#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002504 if (ompt_enabled.enabled) {
2505 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2506 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002507 }
2508#endif
2509
2510 return;
2511 }
2512#endif /* OMP_40_ENABLED */
2513
2514 /* do cleanup and restore the parent team */
2515 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2516 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2517
2518 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2519
2520 /* jc: The following lock has instructions with REL and ACQ semantics,
2521 separating the parallel user code called in this parallel region
2522 from the serial user code called after this function returns. */
2523 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2524
2525#if OMP_40_ENABLED
2526 if (!master_th->th.th_teams_microtask ||
2527 team->t.t_level > master_th->th.th_teams_level)
2528#endif /* OMP_40_ENABLED */
2529 {
2530 /* Decrement our nested depth level */
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00002531 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
Jonathan Peyton30419822017-05-12 18:01:32 +00002532 }
2533 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2534
Joachim Protze82e94a52017-11-01 10:08:30 +00002535#if OMPT_SUPPORT
2536 if (ompt_enabled.enabled) {
2537 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2538 if (ompt_enabled.ompt_callback_implicit_task) {
2539 int ompt_team_size = team->t.t_nproc;
2540 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2541 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
Joachim Protze2b46d302019-01-15 15:36:53 +00002542 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Jonathan Peyton30419822017-05-12 18:01:32 +00002543 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002544
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00002545 task_info->frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00002546 task_info->task_data = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +00002547 }
2548#endif
2549
2550 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2551 master_th, team));
2552 __kmp_pop_current_task_from_thread(master_th);
2553
2554#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2555 // Restore master thread's partition.
2556 master_th->th.th_first_place = team->t.t_first_place;
2557 master_th->th.th_last_place = team->t.t_last_place;
2558#endif /* OMP_40_ENABLED */
Jonathan Peyton92ca6182018-09-07 18:25:49 +00002559#if OMP_50_ENABLED
2560 master_th->th.th_def_allocator = team->t.t_def_allocator;
2561#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002562
2563 updateHWFPControl(team);
2564
2565 if (root->r.r_active != master_active)
2566 root->r.r_active = master_active;
2567
2568 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2569 master_th)); // this will free worker threads
2570
2571 /* this race was fun to find. make sure the following is in the critical
2572 region otherwise assertions may fail occasionally since the old team may be
2573 reallocated and the hierarchy appears inconsistent. it is actually safe to
2574 run and won't cause any bugs, but will cause those assertion failures. it's
2575 only one deref&assign so might as well put this in the critical region */
2576 master_th->th.th_team = parent_team;
2577 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2578 master_th->th.th_team_master = parent_team->t.t_threads[0];
2579 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2580
2581 /* restore serialized team, if need be */
2582 if (parent_team->t.t_serialized &&
2583 parent_team != master_th->th.th_serial_team &&
2584 parent_team != root->r.r_root_team) {
2585 __kmp_free_team(root,
2586 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2587 master_th->th.th_serial_team = parent_team;
2588 }
2589
2590 if (__kmp_tasking_mode != tskm_immediate_exec) {
2591 if (master_th->th.th_task_state_top >
2592 0) { // Restore task state from memo stack
2593 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2594 // Remember master's state if we re-use this nested hot team
2595 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2596 master_th->th.th_task_state;
2597 --master_th->th.th_task_state_top; // pop
2598 // Now restore state at this level
2599 master_th->th.th_task_state =
2600 master_th->th
2601 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2602 }
2603 // Copy the task team from the parent team to the master thread
2604 master_th->th.th_task_team =
2605 parent_team->t.t_task_team[master_th->th.th_task_state];
2606 KA_TRACE(20,
2607 ("__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2608 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2609 parent_team));
2610 }
2611
2612 // TODO: GEH - cannot do this assertion because root thread not set up as
2613 // executing
2614 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2615 master_th->th.th_current_task->td_flags.executing = 1;
2616
2617 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2618
2619#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002620 if (ompt_enabled.enabled) {
2621 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2622 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002623 }
2624#endif
2625
2626 KMP_MB();
2627 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid));
2628}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002629
2630/* Check whether we should push an internal control record onto the
2631 serial team stack. If so, do it. */
Jonathan Peyton30419822017-05-12 18:01:32 +00002632void __kmp_save_internal_controls(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002633
Jonathan Peyton30419822017-05-12 18:01:32 +00002634 if (thread->th.th_team != thread->th.th_serial_team) {
2635 return;
2636 }
2637 if (thread->th.th_team->t.t_serialized > 1) {
2638 int push = 0;
2639
2640 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2641 push = 1;
2642 } else {
2643 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2644 thread->th.th_team->t.t_serialized) {
2645 push = 1;
2646 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002647 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002648 if (push) { /* push a record on the serial team's stack */
2649 kmp_internal_control_t *control =
2650 (kmp_internal_control_t *)__kmp_allocate(
2651 sizeof(kmp_internal_control_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002652
Jonathan Peyton30419822017-05-12 18:01:32 +00002653 copy_icvs(control, &thread->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002654
Jonathan Peyton30419822017-05-12 18:01:32 +00002655 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002656
Jonathan Peyton30419822017-05-12 18:01:32 +00002657 control->next = thread->th.th_team->t.t_control_stack_top;
2658 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002659 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002660 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002661}
2662
2663/* Changes set_nproc */
Jonathan Peyton30419822017-05-12 18:01:32 +00002664void __kmp_set_num_threads(int new_nth, int gtid) {
2665 kmp_info_t *thread;
2666 kmp_root_t *root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002667
Jonathan Peyton30419822017-05-12 18:01:32 +00002668 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2669 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002670
Jonathan Peyton30419822017-05-12 18:01:32 +00002671 if (new_nth < 1)
2672 new_nth = 1;
2673 else if (new_nth > __kmp_max_nth)
2674 new_nth = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002675
Jonathan Peyton30419822017-05-12 18:01:32 +00002676 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
2677 thread = __kmp_threads[gtid];
Andrey Churbanov82318c62018-11-14 13:49:41 +00002678 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2679 return; // nothing to do
Jim Cownie5e8470a2013-09-27 10:38:44 +00002680
Jonathan Peyton30419822017-05-12 18:01:32 +00002681 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002682
Jonathan Peyton30419822017-05-12 18:01:32 +00002683 set__nproc(thread, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002684
Jonathan Peyton30419822017-05-12 18:01:32 +00002685 // If this omp_set_num_threads() call will cause the hot team size to be
2686 // reduced (in the absence of a num_threads clause), then reduce it now,
2687 // rather than waiting for the next parallel region.
2688 root = thread->th.th_root;
2689 if (__kmp_init_parallel && (!root->r.r_active) &&
2690 (root->r.r_hot_team->t.t_nproc > new_nth)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002691#if KMP_NESTED_HOT_TEAMS
2692 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2693#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002694 ) {
2695 kmp_team_t *hot_team = root->r.r_hot_team;
2696 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002697
Jonathan Peyton30419822017-05-12 18:01:32 +00002698 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002699
Jonathan Peyton30419822017-05-12 18:01:32 +00002700 // Release the extra threads we don't need any more.
2701 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2702 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2703 if (__kmp_tasking_mode != tskm_immediate_exec) {
2704 // When decreasing team size, threads no longer in the team should unref
2705 // task team.
2706 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2707 }
2708 __kmp_free_thread(hot_team->t.t_threads[f]);
2709 hot_team->t.t_threads[f] = NULL;
2710 }
2711 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002712#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002713 if (thread->th.th_hot_teams) {
2714 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2715 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2716 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002717#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002718
Jonathan Peyton30419822017-05-12 18:01:32 +00002719 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002720
Jonathan Peyton30419822017-05-12 18:01:32 +00002721 // Update the t_nproc field in the threads that are still active.
2722 for (f = 0; f < new_nth; f++) {
2723 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2724 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002725 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002726 // Special flag in case omp_set_num_threads() call
2727 hot_team->t.t_size_changed = -1;
2728 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002729}
2730
Jim Cownie5e8470a2013-09-27 10:38:44 +00002731/* Changes max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002732void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2733 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002734
Jonathan Peyton30419822017-05-12 18:01:32 +00002735 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "
2736 "%d = (%d)\n",
2737 gtid, max_active_levels));
2738 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002739
Jonathan Peyton30419822017-05-12 18:01:32 +00002740 // validate max_active_levels
2741 if (max_active_levels < 0) {
2742 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2743 // We ignore this call if the user has specified a negative value.
2744 // The current setting won't be changed. The last valid setting will be
2745 // used. A warning will be issued (if warnings are allowed as controlled by
2746 // the KMP_WARNINGS env var).
2747 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "
2748 "max_active_levels for thread %d = (%d)\n",
2749 gtid, max_active_levels));
2750 return;
2751 }
2752 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2753 // it's OK, the max_active_levels is within the valid range: [ 0;
2754 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2755 // We allow a zero value. (implementation defined behavior)
2756 } else {
2757 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2758 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2759 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2760 // Current upper limit is MAX_INT. (implementation defined behavior)
2761 // If the input exceeds the upper limit, we correct the input to be the
2762 // upper limit. (implementation defined behavior)
2763 // Actually, the flow should never get here until we use MAX_INT limit.
2764 }
2765 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "
2766 "max_active_levels for thread %d = (%d)\n",
2767 gtid, max_active_levels));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002768
Jonathan Peyton30419822017-05-12 18:01:32 +00002769 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002770
Jonathan Peyton30419822017-05-12 18:01:32 +00002771 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002772
Jonathan Peyton30419822017-05-12 18:01:32 +00002773 set__max_active_levels(thread, max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002774}
2775
2776/* Gets max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002777int __kmp_get_max_active_levels(int gtid) {
2778 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002779
Jonathan Peyton30419822017-05-12 18:01:32 +00002780 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid));
2781 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002782
Jonathan Peyton30419822017-05-12 18:01:32 +00002783 thread = __kmp_threads[gtid];
2784 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2785 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
2786 "curtask_maxaclevel=%d\n",
2787 gtid, thread->th.th_current_task,
2788 thread->th.th_current_task->td_icvs.max_active_levels));
2789 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002790}
2791
2792/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
Jonathan Peyton30419822017-05-12 18:01:32 +00002793void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2794 kmp_info_t *thread;
2795 // kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002796
Jonathan Peyton30419822017-05-12 18:01:32 +00002797 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2798 gtid, (int)kind, chunk));
2799 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002800
Jonathan Peyton30419822017-05-12 18:01:32 +00002801 // Check if the kind parameter is valid, correct if needed.
2802 // Valid parameters should fit in one of two intervals - standard or extended:
2803 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2804 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2805 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2806 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2807 // TODO: Hint needs attention in case we change the default schedule.
2808 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2809 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"),
2810 __kmp_msg_null);
2811 kind = kmp_sched_default;
2812 chunk = 0; // ignore chunk value in case of bad kind
2813 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002814
Jonathan Peyton30419822017-05-12 18:01:32 +00002815 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002816
Jonathan Peyton30419822017-05-12 18:01:32 +00002817 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002818
Jonathan Peyton30419822017-05-12 18:01:32 +00002819 if (kind < kmp_sched_upper_std) {
2820 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2821 // differ static chunked vs. unchunked: chunk should be invalid to
2822 // indicate unchunked schedule (which is the default)
2823 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002824 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002825 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2826 __kmp_sch_map[kind - kmp_sched_lower - 1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002827 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002828 } else {
2829 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2830 // kmp_sched_lower - 2 ];
2831 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2832 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2833 kmp_sched_lower - 2];
2834 }
Andrey Churbanovd454c732017-06-05 17:17:33 +00002835 if (kind == kmp_sched_auto || chunk < 1) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002836 // ignore parameter chunk for schedule auto
2837 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2838 } else {
2839 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2840 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002841}
2842
2843/* Gets def_sched_var ICV values */
Jonathan Peyton30419822017-05-12 18:01:32 +00002844void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2845 kmp_info_t *thread;
2846 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002847
Jonathan Peyton30419822017-05-12 18:01:32 +00002848 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid));
2849 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002850
Jonathan Peyton30419822017-05-12 18:01:32 +00002851 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002852
Jonathan Peyton30419822017-05-12 18:01:32 +00002853 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002854
Jonathan Peyton30419822017-05-12 18:01:32 +00002855 switch (th_type) {
2856 case kmp_sch_static:
2857 case kmp_sch_static_greedy:
2858 case kmp_sch_static_balanced:
2859 *kind = kmp_sched_static;
2860 *chunk = 0; // chunk was not set, try to show this fact via zero value
2861 return;
2862 case kmp_sch_static_chunked:
2863 *kind = kmp_sched_static;
2864 break;
2865 case kmp_sch_dynamic_chunked:
2866 *kind = kmp_sched_dynamic;
2867 break;
2868 case kmp_sch_guided_chunked:
2869 case kmp_sch_guided_iterative_chunked:
2870 case kmp_sch_guided_analytical_chunked:
2871 *kind = kmp_sched_guided;
2872 break;
2873 case kmp_sch_auto:
2874 *kind = kmp_sched_auto;
2875 break;
2876 case kmp_sch_trapezoidal:
2877 *kind = kmp_sched_trapezoidal;
2878 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002879#if KMP_STATIC_STEAL_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002880 case kmp_sch_static_steal:
2881 *kind = kmp_sched_static_steal;
2882 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002883#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002884 default:
2885 KMP_FATAL(UnknownSchedulingType, th_type);
2886 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002887
Jonathan Peyton30419822017-05-12 18:01:32 +00002888 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002889}
2890
Jonathan Peyton30419822017-05-12 18:01:32 +00002891int __kmp_get_ancestor_thread_num(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002892
Jonathan Peyton30419822017-05-12 18:01:32 +00002893 int ii, dd;
2894 kmp_team_t *team;
2895 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002896
Jonathan Peyton30419822017-05-12 18:01:32 +00002897 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2898 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002899
Jonathan Peyton30419822017-05-12 18:01:32 +00002900 // validate level
2901 if (level == 0)
2902 return 0;
2903 if (level < 0)
2904 return -1;
2905 thr = __kmp_threads[gtid];
2906 team = thr->th.th_team;
2907 ii = team->t.t_level;
2908 if (level > ii)
2909 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002910
2911#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002912 if (thr->th.th_teams_microtask) {
2913 // AC: we are in teams region where multiple nested teams have same level
2914 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2915 if (level <=
2916 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2917 KMP_DEBUG_ASSERT(ii >= tlevel);
2918 // AC: As we need to pass by the teams league, we need to artificially
2919 // increase ii
2920 if (ii == tlevel) {
2921 ii += 2; // three teams have same level
2922 } else {
2923 ii++; // two teams have same level
2924 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002925 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002926 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002927#endif
2928
Jonathan Peyton30419822017-05-12 18:01:32 +00002929 if (ii == level)
2930 return __kmp_tid_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002931
Jonathan Peyton30419822017-05-12 18:01:32 +00002932 dd = team->t.t_serialized;
2933 level++;
2934 while (ii > level) {
2935 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002936 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002937 if ((team->t.t_serialized) && (!dd)) {
2938 team = team->t.t_parent;
2939 continue;
2940 }
2941 if (ii > level) {
2942 team = team->t.t_parent;
2943 dd = team->t.t_serialized;
2944 ii--;
2945 }
2946 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002947
Jonathan Peyton30419822017-05-12 18:01:32 +00002948 return (dd > 1) ? (0) : (team->t.t_master_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002949}
2950
Jonathan Peyton30419822017-05-12 18:01:32 +00002951int __kmp_get_team_size(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002952
Jonathan Peyton30419822017-05-12 18:01:32 +00002953 int ii, dd;
2954 kmp_team_t *team;
2955 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002956
Jonathan Peyton30419822017-05-12 18:01:32 +00002957 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level));
2958 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002959
Jonathan Peyton30419822017-05-12 18:01:32 +00002960 // validate level
2961 if (level == 0)
2962 return 1;
2963 if (level < 0)
2964 return -1;
2965 thr = __kmp_threads[gtid];
2966 team = thr->th.th_team;
2967 ii = team->t.t_level;
2968 if (level > ii)
2969 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002970
2971#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002972 if (thr->th.th_teams_microtask) {
2973 // AC: we are in teams region where multiple nested teams have same level
2974 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2975 if (level <=
2976 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2977 KMP_DEBUG_ASSERT(ii >= tlevel);
2978 // AC: As we need to pass by the teams league, we need to artificially
2979 // increase ii
2980 if (ii == tlevel) {
2981 ii += 2; // three teams have same level
2982 } else {
2983 ii++; // two teams have same level
2984 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002985 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002986 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002987#endif
2988
Jonathan Peyton30419822017-05-12 18:01:32 +00002989 while (ii > level) {
2990 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002991 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002992 if (team->t.t_serialized && (!dd)) {
2993 team = team->t.t_parent;
2994 continue;
2995 }
2996 if (ii > level) {
2997 team = team->t.t_parent;
2998 ii--;
2999 }
3000 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003001
Jonathan Peyton30419822017-05-12 18:01:32 +00003002 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003003}
3004
Jonathan Peyton30419822017-05-12 18:01:32 +00003005kmp_r_sched_t __kmp_get_schedule_global() {
3006 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
3007 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
3008 // independently. So one can get the updated schedule here.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003009
Jonathan Peyton30419822017-05-12 18:01:32 +00003010 kmp_r_sched_t r_sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003011
Jonathan Peyton30419822017-05-12 18:01:32 +00003012 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
3013 // __kmp_guided. __kmp_sched should keep original value, so that user can set
3014 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
3015 // different roots (even in OMP 2.5)
3016 if (__kmp_sched == kmp_sch_static) {
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003017 // replace STATIC with more detailed schedule (balanced or greedy)
3018 r_sched.r_sched_type = __kmp_static;
Jonathan Peyton30419822017-05-12 18:01:32 +00003019 } else if (__kmp_sched == kmp_sch_guided_chunked) {
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003020 // replace GUIDED with more detailed schedule (iterative or analytical)
3021 r_sched.r_sched_type = __kmp_guided;
3022 } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
3023 r_sched.r_sched_type = __kmp_sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00003024 }
3025
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003026 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3027 // __kmp_chunk may be wrong here (if it was not ever set)
Jonathan Peyton30419822017-05-12 18:01:32 +00003028 r_sched.chunk = KMP_DEFAULT_CHUNK;
3029 } else {
3030 r_sched.chunk = __kmp_chunk;
3031 }
3032
3033 return r_sched;
3034}
3035
3036/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
3037 at least argc number of *t_argv entries for the requested team. */
3038static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
3039
3040 KMP_DEBUG_ASSERT(team);
3041 if (!realloc || argc > team->t.t_max_argc) {
3042
3043 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3044 "current entries=%d\n",
3045 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3046 /* if previously allocated heap space for args, free them */
3047 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3048 __kmp_free((void *)team->t.t_argv);
3049
3050 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3051 /* use unused space in the cache line for arguments */
3052 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3053 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
3054 "argv entries\n",
3055 team->t.t_id, team->t.t_max_argc));
3056 team->t.t_argv = &team->t.t_inline_argv[0];
3057 if (__kmp_storage_map) {
3058 __kmp_print_storage_map_gtid(
3059 -1, &team->t.t_inline_argv[0],
3060 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3061 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv",
3062 team->t.t_id);
3063 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003064 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003065 /* allocate space for arguments in the heap */
3066 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3067 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3068 : 2 * argc;
3069 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3070 "argv entries\n",
3071 team->t.t_id, team->t.t_max_argc));
3072 team->t.t_argv =
3073 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc);
3074 if (__kmp_storage_map) {
3075 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3076 &team->t.t_argv[team->t.t_max_argc],
3077 sizeof(void *) * team->t.t_max_argc,
3078 "team_%d.t_argv", team->t.t_id);
3079 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003080 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003081 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003082}
3083
Jonathan Peyton30419822017-05-12 18:01:32 +00003084static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
3085 int i;
3086 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3087 team->t.t_threads =
3088 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth);
3089 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3090 sizeof(dispatch_shared_info_t) * num_disp_buff);
3091 team->t.t_dispatch =
3092 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth);
3093 team->t.t_implicit_task_taskdata =
3094 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth);
3095 team->t.t_max_nproc = max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003096
Jonathan Peyton30419822017-05-12 18:01:32 +00003097 /* setup dispatch buffers */
3098 for (i = 0; i < num_disp_buff; ++i) {
3099 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003100#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003101 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003102#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003103 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003104}
3105
Jonathan Peyton30419822017-05-12 18:01:32 +00003106static void __kmp_free_team_arrays(kmp_team_t *team) {
3107 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3108 int i;
3109 for (i = 0; i < team->t.t_max_nproc; ++i) {
3110 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3111 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3112 team->t.t_dispatch[i].th_disp_buffer = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003113 }
3114 }
Jonathan Peytonf6399362018-07-09 17:51:13 +00003115#if KMP_USE_HIER_SCHED
3116 __kmp_dispatch_free_hierarchies(team);
3117#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003118 __kmp_free(team->t.t_threads);
3119 __kmp_free(team->t.t_disp_buffer);
3120 __kmp_free(team->t.t_dispatch);
3121 __kmp_free(team->t.t_implicit_task_taskdata);
3122 team->t.t_threads = NULL;
3123 team->t.t_disp_buffer = NULL;
3124 team->t.t_dispatch = NULL;
3125 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003126}
3127
Jonathan Peyton30419822017-05-12 18:01:32 +00003128static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3129 kmp_info_t **oldThreads = team->t.t_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003130
Jonathan Peyton30419822017-05-12 18:01:32 +00003131 __kmp_free(team->t.t_disp_buffer);
3132 __kmp_free(team->t.t_dispatch);
3133 __kmp_free(team->t.t_implicit_task_taskdata);
3134 __kmp_allocate_team_arrays(team, max_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003135
Jonathan Peyton30419822017-05-12 18:01:32 +00003136 KMP_MEMCPY(team->t.t_threads, oldThreads,
3137 team->t.t_nproc * sizeof(kmp_info_t *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003138
Jonathan Peyton30419822017-05-12 18:01:32 +00003139 __kmp_free(oldThreads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003140}
3141
Jonathan Peyton30419822017-05-12 18:01:32 +00003142static kmp_internal_control_t __kmp_get_global_icvs(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003143
Jonathan Peyton30419822017-05-12 18:01:32 +00003144 kmp_r_sched_t r_sched =
3145 __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003146
3147#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003148 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003149#endif /* OMP_40_ENABLED */
3150
Jonathan Peyton30419822017-05-12 18:01:32 +00003151 kmp_internal_control_t g_icvs = {
3152 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
Jonathan Peyton30419822017-05-12 18:01:32 +00003153 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3154 // adjustment of threads (per thread)
3155 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3156 // whether blocktime is explicitly set
3157 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003158#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00003159 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3160// intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003161#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003162 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3163 // next parallel region (per thread)
3164 // (use a max ub on value if __kmp_parallel_initialize not called yet)
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00003165 __kmp_cg_max_nth, // int thread_limit;
Jonathan Peyton30419822017-05-12 18:01:32 +00003166 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3167 // for max_active_levels
3168 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3169// {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003170#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003171 __kmp_nested_proc_bind.bind_types[0],
3172 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003173#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00003174 NULL // struct kmp_internal_control *next;
3175 };
Jim Cownie5e8470a2013-09-27 10:38:44 +00003176
Jonathan Peyton30419822017-05-12 18:01:32 +00003177 return g_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003178}
3179
Jonathan Peyton30419822017-05-12 18:01:32 +00003180static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003181
Jonathan Peyton30419822017-05-12 18:01:32 +00003182 kmp_internal_control_t gx_icvs;
3183 gx_icvs.serial_nesting_level =
3184 0; // probably =team->t.t_serial like in save_inter_controls
3185 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3186 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003187
Jonathan Peyton30419822017-05-12 18:01:32 +00003188 return gx_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003189}
3190
Jonathan Peyton30419822017-05-12 18:01:32 +00003191static void __kmp_initialize_root(kmp_root_t *root) {
3192 int f;
3193 kmp_team_t *root_team;
3194 kmp_team_t *hot_team;
3195 int hot_team_max_nth;
3196 kmp_r_sched_t r_sched =
3197 __kmp_get_schedule_global(); // get current state of scheduling globals
3198 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3199 KMP_DEBUG_ASSERT(root);
3200 KMP_ASSERT(!root->r.r_begin);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003201
Jonathan Peyton30419822017-05-12 18:01:32 +00003202 /* setup the root state structure */
3203 __kmp_init_lock(&root->r.r_begin_lock);
3204 root->r.r_begin = FALSE;
3205 root->r.r_active = FALSE;
3206 root->r.r_in_parallel = 0;
3207 root->r.r_blocktime = __kmp_dflt_blocktime;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003208
Jonathan Peyton30419822017-05-12 18:01:32 +00003209 /* setup the root team for this task */
3210 /* allocate the root team structure */
3211 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003212
Jonathan Peyton30419822017-05-12 18:01:32 +00003213 root_team =
3214 __kmp_allocate_team(root,
3215 1, // new_nproc
3216 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003217#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003218 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003219#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003220#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003221 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003222#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003223 &r_icvs,
3224 0 // argc
3225 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3226 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003227#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00003228 // Non-NULL value should be assigned to make the debugger display the root
3229 // team.
3230 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003231#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003232
Jonathan Peyton30419822017-05-12 18:01:32 +00003233 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003234
Jonathan Peyton30419822017-05-12 18:01:32 +00003235 root->r.r_root_team = root_team;
3236 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003237
Jonathan Peyton30419822017-05-12 18:01:32 +00003238 /* initialize root team */
3239 root_team->t.t_threads[0] = NULL;
3240 root_team->t.t_nproc = 1;
3241 root_team->t.t_serialized = 1;
3242 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003243 root_team->t.t_sched.sched = r_sched.sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00003244 KA_TRACE(
3245 20,
3246 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3247 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003248
Jonathan Peyton30419822017-05-12 18:01:32 +00003249 /* setup the hot team for this task */
3250 /* allocate the hot team structure */
3251 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003252
Jonathan Peyton30419822017-05-12 18:01:32 +00003253 hot_team =
3254 __kmp_allocate_team(root,
3255 1, // new_nproc
3256 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003257#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003258 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003259#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003260#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003261 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003262#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003263 &r_icvs,
3264 0 // argc
3265 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3266 );
3267 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003268
Jonathan Peyton30419822017-05-12 18:01:32 +00003269 root->r.r_hot_team = hot_team;
3270 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003271
Jonathan Peyton30419822017-05-12 18:01:32 +00003272 /* first-time initialization */
3273 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003274
Jonathan Peyton30419822017-05-12 18:01:32 +00003275 /* initialize hot team */
3276 hot_team_max_nth = hot_team->t.t_max_nproc;
3277 for (f = 0; f < hot_team_max_nth; ++f) {
3278 hot_team->t.t_threads[f] = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003279 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003280 hot_team->t.t_nproc = 1;
3281 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003282 hot_team->t.t_sched.sched = r_sched.sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00003283 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003284}
3285
3286#ifdef KMP_DEBUG
3287
Jim Cownie5e8470a2013-09-27 10:38:44 +00003288typedef struct kmp_team_list_item {
Jonathan Peyton30419822017-05-12 18:01:32 +00003289 kmp_team_p const *entry;
3290 struct kmp_team_list_item *next;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003291} kmp_team_list_item_t;
Jonathan Peyton30419822017-05-12 18:01:32 +00003292typedef kmp_team_list_item_t *kmp_team_list_t;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003293
Jonathan Peyton30419822017-05-12 18:01:32 +00003294static void __kmp_print_structure_team_accum( // Add team to list of teams.
3295 kmp_team_list_t list, // List of teams.
3296 kmp_team_p const *team // Team to add.
3297 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003298
Jonathan Peyton30419822017-05-12 18:01:32 +00003299 // List must terminate with item where both entry and next are NULL.
3300 // Team is added to the list only once.
3301 // List is sorted in ascending order by team id.
3302 // Team id is *not* a key.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003303
Jonathan Peyton30419822017-05-12 18:01:32 +00003304 kmp_team_list_t l;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003305
Jonathan Peyton30419822017-05-12 18:01:32 +00003306 KMP_DEBUG_ASSERT(list != NULL);
3307 if (team == NULL) {
3308 return;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003309 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003310
Jonathan Peyton30419822017-05-12 18:01:32 +00003311 __kmp_print_structure_team_accum(list, team->t.t_parent);
3312 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003313
Jonathan Peyton30419822017-05-12 18:01:32 +00003314 // Search list for the team.
3315 l = list;
3316 while (l->next != NULL && l->entry != team) {
3317 l = l->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003318 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003319 if (l->next != NULL) {
3320 return; // Team has been added before, exit.
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003321 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003322
Jonathan Peyton30419822017-05-12 18:01:32 +00003323 // Team is not found. Search list again for insertion point.
3324 l = list;
3325 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3326 l = l->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003327 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003328
Jonathan Peyton30419822017-05-12 18:01:32 +00003329 // Insert team.
3330 {
3331 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3332 sizeof(kmp_team_list_item_t));
3333 *item = *l;
3334 l->entry = team;
3335 l->next = item;
3336 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003337}
3338
Jonathan Peyton30419822017-05-12 18:01:32 +00003339static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
Jim Cownie5e8470a2013-09-27 10:38:44 +00003340
Jonathan Peyton30419822017-05-12 18:01:32 +00003341 ) {
3342 __kmp_printf("%s", title);
3343 if (team != NULL) {
3344 __kmp_printf("%2x %p\n", team->t.t_id, team);
3345 } else {
3346 __kmp_printf(" - (nil)\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003347 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003348}
3349
Jonathan Peyton30419822017-05-12 18:01:32 +00003350static void __kmp_print_structure_thread(char const *title,
3351 kmp_info_p const *thread) {
3352 __kmp_printf("%s", title);
3353 if (thread != NULL) {
3354 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3355 } else {
3356 __kmp_printf(" - (nil)\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003357 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003358}
3359
Jonathan Peyton30419822017-05-12 18:01:32 +00003360void __kmp_print_structure(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003361
Jonathan Peyton30419822017-05-12 18:01:32 +00003362 kmp_team_list_t list;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003363
Jonathan Peyton30419822017-05-12 18:01:32 +00003364 // Initialize list of teams.
3365 list =
3366 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t));
3367 list->entry = NULL;
3368 list->next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003369
Jonathan Peyton30419822017-05-12 18:01:32 +00003370 __kmp_printf("\n------------------------------\nGlobal Thread "
3371 "Table\n------------------------------\n");
3372 {
3373 int gtid;
3374 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3375 __kmp_printf("%2d", gtid);
3376 if (__kmp_threads != NULL) {
3377 __kmp_printf(" %p", __kmp_threads[gtid]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003378 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003379 if (__kmp_root != NULL) {
3380 __kmp_printf(" %p", __kmp_root[gtid]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003381 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003382 __kmp_printf("\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003383 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003384 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003385
Jonathan Peyton30419822017-05-12 18:01:32 +00003386 // Print out __kmp_threads array.
3387 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3388 "----------\n");
3389 if (__kmp_threads != NULL) {
3390 int gtid;
3391 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3392 kmp_info_t const *thread = __kmp_threads[gtid];
3393 if (thread != NULL) {
3394 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3395 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3396 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3397 __kmp_print_structure_team(" Serial Team: ",
3398 thread->th.th_serial_team);
3399 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3400 __kmp_print_structure_thread(" Master: ",
3401 thread->th.th_team_master);
3402 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3403 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003404#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003405 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003406#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003407 __kmp_print_structure_thread(" Next in pool: ",
3408 thread->th.th_next_pool);
3409 __kmp_printf("\n");
3410 __kmp_print_structure_team_accum(list, thread->th.th_team);
3411 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003412 }
3413 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003414 } else {
3415 __kmp_printf("Threads array is not allocated.\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003416 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003417
Jonathan Peyton30419822017-05-12 18:01:32 +00003418 // Print out __kmp_root array.
3419 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3420 "--------\n");
3421 if (__kmp_root != NULL) {
3422 int gtid;
3423 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3424 kmp_root_t const *root = __kmp_root[gtid];
3425 if (root != NULL) {
3426 __kmp_printf("GTID %2d %p:\n", gtid, root);
3427 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3428 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3429 __kmp_print_structure_thread(" Uber Thread: ",
3430 root->r.r_uber_thread);
3431 __kmp_printf(" Active?: %2d\n", root->r.r_active);
Jonathan Peyton61d44f12018-07-09 18:09:25 +00003432 __kmp_printf(" In Parallel: %2d\n",
3433 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
Jonathan Peyton30419822017-05-12 18:01:32 +00003434 __kmp_printf("\n");
3435 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3436 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003437 }
3438 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003439 } else {
3440 __kmp_printf("Ubers array is not allocated.\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003441 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003442
Jonathan Peyton30419822017-05-12 18:01:32 +00003443 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3444 "--------\n");
3445 while (list->next != NULL) {
3446 kmp_team_p const *team = list->entry;
3447 int i;
3448 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3449 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3450 __kmp_printf(" Master TID: %2d\n", team->t.t_master_tid);
3451 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3452 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3453 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3454 for (i = 0; i < team->t.t_nproc; ++i) {
3455 __kmp_printf(" Thread %2d: ", i);
3456 __kmp_print_structure_thread("", team->t.t_threads[i]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003457 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003458 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3459 __kmp_printf("\n");
3460 list = list->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003461 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003462
Jonathan Peyton30419822017-05-12 18:01:32 +00003463 // Print out __kmp_thread_pool and __kmp_team_pool.
3464 __kmp_printf("\n------------------------------\nPools\n----------------------"
3465 "--------\n");
3466 __kmp_print_structure_thread("Thread pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003467 CCAST(kmp_info_t *, __kmp_thread_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003468 __kmp_print_structure_team("Team pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003469 CCAST(kmp_team_t *, __kmp_team_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003470 __kmp_printf("\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +00003471
Jonathan Peyton30419822017-05-12 18:01:32 +00003472 // Free team list.
3473 while (list != NULL) {
3474 kmp_team_list_item_t *item = list;
3475 list = list->next;
3476 KMP_INTERNAL_FREE(item);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003477 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003478}
3479
3480#endif
3481
Jim Cownie5e8470a2013-09-27 10:38:44 +00003482//---------------------------------------------------------------------------
3483// Stuff for per-thread fast random number generator
3484// Table of primes
Jim Cownie5e8470a2013-09-27 10:38:44 +00003485static const unsigned __kmp_primes[] = {
Jonathan Peyton30419822017-05-12 18:01:32 +00003486 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3487 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3488 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3489 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3490 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3491 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3492 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3493 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3494 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3495 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3496 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
Jim Cownie5e8470a2013-09-27 10:38:44 +00003497
3498//---------------------------------------------------------------------------
3499// __kmp_get_random: Get a random number using a linear congruential method.
Jonathan Peyton30419822017-05-12 18:01:32 +00003500unsigned short __kmp_get_random(kmp_info_t *thread) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003501 unsigned x = thread->th.th_x;
Jonathan Peyton30419822017-05-12 18:01:32 +00003502 unsigned short r = x >> 16;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003503
Jonathan Peyton30419822017-05-12 18:01:32 +00003504 thread->th.th_x = x * thread->th.th_a + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003505
3506 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
Jonathan Peyton30419822017-05-12 18:01:32 +00003507 thread->th.th_info.ds.ds_tid, r));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003508
3509 return r;
3510}
3511//--------------------------------------------------------
3512// __kmp_init_random: Initialize a random number generator
Jonathan Peyton30419822017-05-12 18:01:32 +00003513void __kmp_init_random(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003514 unsigned seed = thread->th.th_info.ds.ds_tid;
3515
Jonathan Peyton30419822017-05-12 18:01:32 +00003516 thread->th.th_a =
3517 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3518 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3519 KA_TRACE(30,
3520 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003521}
3522
Jim Cownie5e8470a2013-09-27 10:38:44 +00003523#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00003524/* reclaim array entries for root threads that are already dead, returns number
3525 * reclaimed */
3526static int __kmp_reclaim_dead_roots(void) {
3527 int i, r = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003528
Jonathan Peyton30419822017-05-12 18:01:32 +00003529 for (i = 0; i < __kmp_threads_capacity; ++i) {
3530 if (KMP_UBER_GTID(i) &&
3531 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3532 !__kmp_root[i]
3533 ->r.r_active) { // AC: reclaim only roots died in non-active state
3534 r += __kmp_unregister_root_other_thread(i);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003535 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003536 }
3537 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003538}
3539#endif
3540
Jonathan Peyton30419822017-05-12 18:01:32 +00003541/* This function attempts to create free entries in __kmp_threads and
3542 __kmp_root, and returns the number of free entries generated.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003543
Jonathan Peyton30419822017-05-12 18:01:32 +00003544 For Windows* OS static library, the first mechanism used is to reclaim array
3545 entries for root threads that are already dead.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003546
Jonathan Peyton30419822017-05-12 18:01:32 +00003547 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3548 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3549 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3550 threadprivate cache array has been created. Synchronization with
3551 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003552
Jonathan Peyton30419822017-05-12 18:01:32 +00003553 After any dead root reclamation, if the clipping value allows array expansion
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003554 to result in the generation of a total of nNeed free slots, the function does
3555 that expansion. If not, nothing is done beyond the possible initial root
3556 thread reclamation.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003557
Jonathan Peyton30419822017-05-12 18:01:32 +00003558 If any argument is negative, the behavior is undefined. */
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003559static int __kmp_expand_threads(int nNeed) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003560 int added = 0;
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003561 int minimumRequiredCapacity;
3562 int newCapacity;
3563 kmp_info_t **newThreads;
3564 kmp_root_t **newRoot;
3565
3566// All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so
3567// resizing __kmp_threads does not need additional protection if foreign
3568// threads are present
Jim Cownie5e8470a2013-09-27 10:38:44 +00003569
Jonathan Peyton8b3842f2018-10-05 17:59:39 +00003570#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +00003571 /* only for Windows static library */
3572 /* reclaim array entries for root threads that are already dead */
3573 added = __kmp_reclaim_dead_roots();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003574
Jonathan Peyton30419822017-05-12 18:01:32 +00003575 if (nNeed) {
3576 nNeed -= added;
3577 if (nNeed < 0)
3578 nNeed = 0;
3579 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003580#endif
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003581 if (nNeed <= 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003582 return added;
Jonathan Peyton30419822017-05-12 18:01:32 +00003583
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003584 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3585 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3586 // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
3587 // > __kmp_max_nth in one of two ways:
3588 //
3589 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3590 // may not be resused by another thread, so we may need to increase
3591 // __kmp_threads_capacity to __kmp_max_nth + 1.
3592 //
3593 // 2) New foreign root(s) are encountered. We always register new foreign
3594 // roots. This may cause a smaller # of threads to be allocated at
3595 // subsequent parallel regions, but the worker threads hang around (and
3596 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3597 //
3598 // Anyway, that is the reason for moving the check to see if
3599 // __kmp_max_nth was exceeded into __kmp_reserve_threads()
3600 // instead of having it performed here. -BB
Jonathan Peyton30419822017-05-12 18:01:32 +00003601
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003602 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
Jonathan Peyton30419822017-05-12 18:01:32 +00003603
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003604 /* compute expansion headroom to check if we can expand */
3605 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3606 /* possible expansion too small -- give up */
3607 return added;
Jonathan Peyton30419822017-05-12 18:01:32 +00003608 }
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003609 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3610
3611 newCapacity = __kmp_threads_capacity;
3612 do {
3613 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3614 : __kmp_sys_max_nth;
3615 } while (newCapacity < minimumRequiredCapacity);
3616 newThreads = (kmp_info_t **)__kmp_allocate(
3617 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3618 newRoot =
3619 (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity);
3620 KMP_MEMCPY(newThreads, __kmp_threads,
3621 __kmp_threads_capacity * sizeof(kmp_info_t *));
3622 KMP_MEMCPY(newRoot, __kmp_root,
3623 __kmp_threads_capacity * sizeof(kmp_root_t *));
3624
3625 kmp_info_t **temp_threads = __kmp_threads;
3626 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3627 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3628 __kmp_free(temp_threads);
3629 added += newCapacity - __kmp_threads_capacity;
3630 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3631
3632 if (newCapacity > __kmp_tp_capacity) {
3633 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3634 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3635 __kmp_threadprivate_resize_cache(newCapacity);
3636 } else { // increase __kmp_tp_capacity to correspond with kmp_threads size
3637 *(volatile int *)&__kmp_tp_capacity = newCapacity;
3638 }
3639 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3640 }
3641
Jonathan Peyton30419822017-05-12 18:01:32 +00003642 return added;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003643}
3644
Jonathan Peyton30419822017-05-12 18:01:32 +00003645/* Register the current thread as a root thread and obtain our gtid. We must
3646 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3647 thread that calls from __kmp_do_serial_initialize() */
3648int __kmp_register_root(int initial_thread) {
3649 kmp_info_t *root_thread;
3650 kmp_root_t *root;
3651 int gtid;
3652 int capacity;
3653 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3654 KA_TRACE(20, ("__kmp_register_root: entered\n"));
3655 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003656
Jonathan Peyton30419822017-05-12 18:01:32 +00003657 /* 2007-03-02:
3658 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3659 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3660 work as expected -- it may return false (that means there is at least one
3661 empty slot in __kmp_threads array), but it is possible the only free slot
3662 is #0, which is reserved for initial thread and so cannot be used for this
3663 one. Following code workarounds this bug.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003664
Jonathan Peyton30419822017-05-12 18:01:32 +00003665 However, right solution seems to be not reserving slot #0 for initial
3666 thread because:
3667 (1) there is no magic in slot #0,
3668 (2) we cannot detect initial thread reliably (the first thread which does
3669 serial initialization may be not a real initial thread).
3670 */
3671 capacity = __kmp_threads_capacity;
3672 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3673 --capacity;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003674 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003675
Jonathan Peyton30419822017-05-12 18:01:32 +00003676 /* see if there are too many threads */
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003677 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003678 if (__kmp_tp_cached) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00003679 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3680 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3681 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00003682 } else {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00003683 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3684 __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003685 }
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003686 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003687
3688 /* find an available thread slot */
3689 /* Don't reassign the zero slot since we need that to only be used by initial
3690 thread */
3691 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3692 gtid++)
3693 ;
3694 KA_TRACE(1,
3695 ("__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3696 KMP_ASSERT(gtid < __kmp_threads_capacity);
3697
3698 /* update global accounting */
3699 __kmp_all_nth++;
3700 TCW_4(__kmp_nth, __kmp_nth + 1);
3701
3702 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3703 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3704 if (__kmp_adjust_gtid_mode) {
3705 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3706 if (TCR_4(__kmp_gtid_mode) != 2) {
3707 TCW_4(__kmp_gtid_mode, 2);
3708 }
3709 } else {
3710 if (TCR_4(__kmp_gtid_mode) != 1) {
3711 TCW_4(__kmp_gtid_mode, 1);
3712 }
3713 }
3714 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003715
3716#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00003717 /* Adjust blocktime to zero if necessary */
3718 /* Middle initialization might not have occurred yet */
3719 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3720 if (__kmp_nth > __kmp_avail_proc) {
3721 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003722 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003723 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003724#endif /* KMP_ADJUST_BLOCKTIME */
3725
Jonathan Peyton30419822017-05-12 18:01:32 +00003726 /* setup this new hierarchy */
3727 if (!(root = __kmp_root[gtid])) {
3728 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t));
3729 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3730 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003731
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003732#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003733 // Initialize stats as soon as possible (right after gtid assignment).
3734 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00003735 __kmp_stats_thread_ptr->startLife();
Jonathan Peyton30419822017-05-12 18:01:32 +00003736 KMP_SET_THREAD_STATE(SERIAL_REGION);
3737 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003738#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003739 __kmp_initialize_root(root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003740
Jonathan Peyton30419822017-05-12 18:01:32 +00003741 /* setup new root thread structure */
3742 if (root->r.r_uber_thread) {
3743 root_thread = root->r.r_uber_thread;
3744 } else {
3745 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
3746 if (__kmp_storage_map) {
3747 __kmp_print_thread_storage_map(root_thread, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003748 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003749 root_thread->th.th_info.ds.ds_gtid = gtid;
Joachim Protze82e94a52017-11-01 10:08:30 +00003750#if OMPT_SUPPORT
Jonathan Peyton3574f282018-10-04 14:57:04 +00003751 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00003752#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003753 root_thread->th.th_root = root;
3754 if (__kmp_env_consistency_check) {
3755 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3756 }
3757#if USE_FAST_MEMORY
3758 __kmp_initialize_fast_memory(root_thread);
3759#endif /* USE_FAST_MEMORY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003760
Jonathan Peyton30419822017-05-12 18:01:32 +00003761#if KMP_USE_BGET
3762 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3763 __kmp_initialize_bget(root_thread);
3764#endif
3765 __kmp_init_random(root_thread); // Initialize random number generator
3766 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003767
Jonathan Peyton30419822017-05-12 18:01:32 +00003768 /* setup the serial team held in reserve by the root thread */
3769 if (!root_thread->th.th_serial_team) {
3770 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3771 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"));
3772 root_thread->th.th_serial_team =
3773 __kmp_allocate_team(root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003774#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003775 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003776#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003777#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003778 proc_bind_default,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003779#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003780 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3781 }
3782 KMP_ASSERT(root_thread->th.th_serial_team);
3783 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",
3784 root_thread->th.th_serial_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003785
Jonathan Peyton30419822017-05-12 18:01:32 +00003786 /* drop root_thread into place */
3787 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003788
Jonathan Peyton30419822017-05-12 18:01:32 +00003789 root->r.r_root_team->t.t_threads[0] = root_thread;
3790 root->r.r_hot_team->t.t_threads[0] = root_thread;
3791 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3792 // AC: the team created in reserve, not for execution (it is unused for now).
3793 root_thread->th.th_serial_team->t.t_serialized = 0;
3794 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003795
Jonathan Peyton30419822017-05-12 18:01:32 +00003796 /* initialize the thread, get it ready to go */
3797 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3798 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003799
Jonathan Peyton30419822017-05-12 18:01:32 +00003800 /* prepare the master thread for get_gtid() */
3801 __kmp_gtid_set_specific(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003802
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003803#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003804 __kmp_itt_thread_name(gtid);
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003805#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003806
Jonathan Peyton30419822017-05-12 18:01:32 +00003807#ifdef KMP_TDATA_GTID
3808 __kmp_gtid = gtid;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003809#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003810 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3811 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3812
3813 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3814 "plain=%u\n",
3815 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3816 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3817 KMP_INIT_BARRIER_STATE));
3818 { // Initialize barrier data.
3819 int b;
3820 for (b = 0; b < bs_last_barrier; ++b) {
3821 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3822#if USE_DEBUGGER
3823 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3824#endif
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003825 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003826 }
3827 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3828 KMP_INIT_BARRIER_STATE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003829
Alp Toker763b9392014-02-28 09:42:41 +00003830#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00003831#if OMP_40_ENABLED
3832 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3833 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3834 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3835 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3836#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003837 if (TCR_4(__kmp_init_middle)) {
3838 __kmp_affinity_set_init_mask(gtid, TRUE);
3839 }
Alp Toker763b9392014-02-28 09:42:41 +00003840#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton92ca6182018-09-07 18:25:49 +00003841#if OMP_50_ENABLED
3842 root_thread->th.th_def_allocator = __kmp_def_allocator;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00003843 root_thread->th.th_prev_level = 0;
3844 root_thread->th.th_prev_num_threads = 1;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00003845#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003846
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00003847 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t));
3848 tmp->cg_root = root_thread;
3849 tmp->cg_thread_limit = __kmp_cg_max_nth;
3850 tmp->cg_nthreads = 1;
3851 KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with"
3852 " cg_nthreads init to 1\n",
3853 root_thread, tmp));
3854 tmp->up = NULL;
3855 root_thread->th.th_cg_roots = tmp;
3856
Jonathan Peyton30419822017-05-12 18:01:32 +00003857 __kmp_root_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003858
Joachim Protze82e94a52017-11-01 10:08:30 +00003859#if OMPT_SUPPORT
3860 if (!initial_thread && ompt_enabled.enabled) {
3861
Joachim Protze489cdb72018-09-10 14:34:54 +00003862 kmp_info_t *root_thread = ompt_get_thread();
Joachim Protze82e94a52017-11-01 10:08:30 +00003863
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003864 ompt_set_thread_state(root_thread, ompt_state_overhead);
Joachim Protze82e94a52017-11-01 10:08:30 +00003865
3866 if (ompt_enabled.ompt_callback_thread_begin) {
3867 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3868 ompt_thread_initial, __ompt_get_thread_data_internal());
3869 }
3870 ompt_data_t *task_data;
3871 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
3872 if (ompt_enabled.ompt_callback_task_create) {
3873 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
3874 NULL, NULL, task_data, ompt_task_initial, 0, NULL);
3875 // initial task has nothing to return to
3876 }
3877
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00003878 ompt_set_thread_state(root_thread, ompt_state_work_serial);
Joachim Protze82e94a52017-11-01 10:08:30 +00003879 }
3880#endif
3881
Jonathan Peyton30419822017-05-12 18:01:32 +00003882 KMP_MB();
3883 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003884
Jonathan Peyton30419822017-05-12 18:01:32 +00003885 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003886}
3887
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003888#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003889static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
3890 const int max_level) {
3891 int i, n, nth;
3892 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3893 if (!hot_teams || !hot_teams[level].hot_team) {
3894 return 0;
3895 }
3896 KMP_DEBUG_ASSERT(level < max_level);
3897 kmp_team_t *team = hot_teams[level].hot_team;
3898 nth = hot_teams[level].hot_team_nth;
3899 n = nth - 1; // master is not freed
3900 if (level < max_level - 1) {
3901 for (i = 0; i < nth; ++i) {
3902 kmp_info_t *th = team->t.t_threads[i];
3903 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3904 if (i > 0 && th->th.th_hot_teams) {
3905 __kmp_free(th->th.th_hot_teams);
3906 th->th.th_hot_teams = NULL;
3907 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003908 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003909 }
3910 __kmp_free_team(root, team, NULL);
3911 return n;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003912}
3913#endif
3914
Jonathan Peyton30419822017-05-12 18:01:32 +00003915// Resets a root thread and clear its root and hot teams.
3916// Returns the number of __kmp_threads entries directly and indirectly freed.
3917static int __kmp_reset_root(int gtid, kmp_root_t *root) {
3918 kmp_team_t *root_team = root->r.r_root_team;
3919 kmp_team_t *hot_team = root->r.r_hot_team;
3920 int n = hot_team->t.t_nproc;
3921 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003922
Jonathan Peyton30419822017-05-12 18:01:32 +00003923 KMP_DEBUG_ASSERT(!root->r.r_active);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003924
Jonathan Peyton30419822017-05-12 18:01:32 +00003925 root->r.r_root_team = NULL;
3926 root->r.r_hot_team = NULL;
3927 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
3928 // before call to __kmp_free_team().
3929 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003930#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003931 if (__kmp_hot_teams_max_level >
3932 0) { // need to free nested hot teams and their threads if any
3933 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3934 kmp_info_t *th = hot_team->t.t_threads[i];
3935 if (__kmp_hot_teams_max_level > 1) {
3936 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3937 }
3938 if (th->th.th_hot_teams) {
3939 __kmp_free(th->th.th_hot_teams);
3940 th->th.th_hot_teams = NULL;
3941 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003942 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003943 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003944#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003945 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003946
Jonathan Peyton30419822017-05-12 18:01:32 +00003947 // Before we can reap the thread, we need to make certain that all other
3948 // threads in the teams that had this root as ancestor have stopped trying to
3949 // steal tasks.
3950 if (__kmp_tasking_mode != tskm_immediate_exec) {
3951 __kmp_wait_to_unref_task_teams();
3952 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003953
Jonathan Peyton30419822017-05-12 18:01:32 +00003954#if KMP_OS_WINDOWS
3955 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3956 KA_TRACE(
3957 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3958 "\n",
3959 (LPVOID) & (root->r.r_uber_thread->th),
3960 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3961 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3962#endif /* KMP_OS_WINDOWS */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003963
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003964#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003965 if (ompt_enabled.ompt_callback_thread_end) {
3966 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3967 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
Jonathan Peyton30419822017-05-12 18:01:32 +00003968 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003969#endif
3970
Jonathan Peyton30419822017-05-12 18:01:32 +00003971 TCW_4(__kmp_nth,
3972 __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00003973 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
3974 KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
3975 " to %d\n",
3976 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
3977 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
Jonathan Peytonf4392462017-07-27 20:58:41 +00003978
Jonathan Peyton30419822017-05-12 18:01:32 +00003979 __kmp_reap_thread(root->r.r_uber_thread, 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003980
Jonathan Peyton30419822017-05-12 18:01:32 +00003981 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead
3982 // of freeing.
3983 root->r.r_uber_thread = NULL;
3984 /* mark root as no longer in use */
3985 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003986
Jonathan Peyton30419822017-05-12 18:01:32 +00003987 return n;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003988}
3989
Jonathan Peyton30419822017-05-12 18:01:32 +00003990void __kmp_unregister_root_current_thread(int gtid) {
3991 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3992 /* this lock should be ok, since unregister_root_current_thread is never
3993 called during an abort, only during a normal close. furthermore, if you
3994 have the forkjoin lock, you should never try to get the initz lock */
3995 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3996 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3997 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "
3998 "exiting T#%d\n",
3999 gtid));
4000 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4001 return;
4002 }
4003 kmp_root_t *root = __kmp_root[gtid];
Jim Cownie77c2a632014-09-03 11:34:33 +00004004
Jonathan Peyton30419822017-05-12 18:01:32 +00004005 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4006 KMP_ASSERT(KMP_UBER_GTID(gtid));
4007 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4008 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004009
Jonathan Peyton30419822017-05-12 18:01:32 +00004010 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004011
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004012#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004013 kmp_info_t *thread = __kmp_threads[gtid];
4014 kmp_team_t *team = thread->th.th_team;
4015 kmp_task_team_t *task_team = thread->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00004016
Jonathan Peyton30419822017-05-12 18:01:32 +00004017 // we need to wait for the proxy tasks before finishing the thread
4018 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
Jonathan Peyton6d247f72015-09-10 21:33:50 +00004019#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00004020 // the runtime is shutting down so we won't report any events
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00004021 thread->th.ompt_thread_info.state = ompt_state_undefined;
Jonathan Peyton6d247f72015-09-10 21:33:50 +00004022#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004023 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4024 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00004025#endif
4026
Jonathan Peyton30419822017-05-12 18:01:32 +00004027 __kmp_reset_root(gtid, root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004028
Jonathan Peyton30419822017-05-12 18:01:32 +00004029 /* free up this thread slot */
4030 __kmp_gtid_set_specific(KMP_GTID_DNE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004031#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +00004032 __kmp_gtid = KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004033#endif
4034
Jonathan Peyton30419822017-05-12 18:01:32 +00004035 KMP_MB();
4036 KC_TRACE(10,
4037 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004038
Jonathan Peyton30419822017-05-12 18:01:32 +00004039 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004040}
4041
Jonathan Peyton2321d572015-06-08 19:25:25 +00004042#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00004043/* __kmp_forkjoin_lock must be already held
Jonathan Peyton30419822017-05-12 18:01:32 +00004044 Unregisters a root thread that is not the current thread. Returns the number
4045 of __kmp_threads entries freed as a result. */
4046static int __kmp_unregister_root_other_thread(int gtid) {
4047 kmp_root_t *root = __kmp_root[gtid];
4048 int r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004049
Jonathan Peyton30419822017-05-12 18:01:32 +00004050 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4051 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4052 KMP_ASSERT(KMP_UBER_GTID(gtid));
4053 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4054 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004055
Jonathan Peyton30419822017-05-12 18:01:32 +00004056 r = __kmp_reset_root(gtid, root);
4057 KC_TRACE(10,
4058 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4059 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004060}
Jonathan Peyton2321d572015-06-08 19:25:25 +00004061#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004062
Jim Cownie5e8470a2013-09-27 10:38:44 +00004063#if KMP_DEBUG
4064void __kmp_task_info() {
4065
Jonathan Peyton30419822017-05-12 18:01:32 +00004066 kmp_int32 gtid = __kmp_entry_gtid();
4067 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4068 kmp_info_t *this_thr = __kmp_threads[gtid];
4069 kmp_team_t *steam = this_thr->th.th_serial_team;
4070 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004071
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00004072 __kmp_printf(
4073 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4074 "ptask=%p\n",
4075 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4076 team->t.t_implicit_task_taskdata[tid].td_parent);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004077}
4078#endif // KMP_DEBUG
4079
Jonathan Peyton30419822017-05-12 18:01:32 +00004080/* TODO optimize with one big memclr, take out what isn't needed, split
4081 responsibility to workers as much as possible, and delay initialization of
4082 features as much as possible */
4083static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4084 int tid, int gtid) {
4085 /* this_thr->th.th_info.ds.ds_gtid is setup in
4086 kmp_allocate_thread/create_worker.
4087 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4088 kmp_info_t *master = team->t.t_threads[0];
4089 KMP_DEBUG_ASSERT(this_thr != NULL);
4090 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4091 KMP_DEBUG_ASSERT(team);
4092 KMP_DEBUG_ASSERT(team->t.t_threads);
4093 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4094 KMP_DEBUG_ASSERT(master);
4095 KMP_DEBUG_ASSERT(master->th.th_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004096
Jonathan Peyton30419822017-05-12 18:01:32 +00004097 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004098
Jonathan Peyton30419822017-05-12 18:01:32 +00004099 TCW_SYNC_PTR(this_thr->th.th_team, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004100
Jonathan Peyton30419822017-05-12 18:01:32 +00004101 this_thr->th.th_info.ds.ds_tid = tid;
4102 this_thr->th.th_set_nproc = 0;
4103 if (__kmp_tasking_mode != tskm_immediate_exec)
4104 // When tasking is possible, threads are not safe to reap until they are
4105 // done tasking; this will be set when tasking code is exited in wait
4106 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4107 else // no tasking --> always safe to reap
4108 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004109#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004110 this_thr->th.th_set_proc_bind = proc_bind_default;
4111#if KMP_AFFINITY_SUPPORTED
4112 this_thr->th.th_new_place = this_thr->th.th_current_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004113#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004114#endif
4115 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004116
Jonathan Peyton30419822017-05-12 18:01:32 +00004117 /* setup the thread's cache of the team structure */
4118 this_thr->th.th_team_nproc = team->t.t_nproc;
4119 this_thr->th.th_team_master = master;
4120 this_thr->th.th_team_serialized = team->t.t_serialized;
4121 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004122
Jonathan Peyton30419822017-05-12 18:01:32 +00004123 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004124
Jonathan Peyton30419822017-05-12 18:01:32 +00004125 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4126 tid, gtid, this_thr, this_thr->th.th_current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004127
Jonathan Peyton30419822017-05-12 18:01:32 +00004128 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4129 team, tid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004130
Jonathan Peyton30419822017-05-12 18:01:32 +00004131 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4132 tid, gtid, this_thr, this_thr->th.th_current_task));
4133 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4134 // __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004135
Jonathan Peyton30419822017-05-12 18:01:32 +00004136 /* TODO no worksharing in speculative threads */
4137 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004138
Jonathan Peyton30419822017-05-12 18:01:32 +00004139 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004140
Jonathan Peyton30419822017-05-12 18:01:32 +00004141 if (!this_thr->th.th_pri_common) {
4142 this_thr->th.th_pri_common =
4143 (struct common_table *)__kmp_allocate(sizeof(struct common_table));
4144 if (__kmp_storage_map) {
4145 __kmp_print_storage_map_gtid(
4146 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4147 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00004148 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004149 this_thr->th.th_pri_head = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00004150 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004151
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00004152 if (this_thr != master && // Master's CG root is initialized elsewhere
4153 this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set
4154 // Make new thread's CG root same as master's
4155 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4156 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4157 // Increment new thread's CG root's counter to add the new thread
4158 this_thr->th.th_cg_roots->cg_nthreads++;
4159 KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
4160 " node %p of thread %p to %d\n",
4161 this_thr, this_thr->th.th_cg_roots,
4162 this_thr->th.th_cg_roots->cg_root,
4163 this_thr->th.th_cg_roots->cg_nthreads));
4164 this_thr->th.th_current_task->td_icvs.thread_limit =
4165 this_thr->th.th_cg_roots->cg_thread_limit;
4166 }
4167
Jonathan Peyton30419822017-05-12 18:01:32 +00004168 /* Initialize dynamic dispatch */
4169 {
4170 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4171 // Use team max_nproc since this will never change for the team.
4172 size_t disp_size =
4173 sizeof(dispatch_private_info_t) *
4174 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4175 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4176 team->t.t_max_nproc));
4177 KMP_ASSERT(dispatch);
4178 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4179 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004180
Jonathan Peyton30419822017-05-12 18:01:32 +00004181 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004182#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004183 dispatch->th_doacross_buf_idx = 0;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004184#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004185 if (!dispatch->th_disp_buffer) {
4186 dispatch->th_disp_buffer =
4187 (dispatch_private_info_t *)__kmp_allocate(disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004188
Jonathan Peyton30419822017-05-12 18:01:32 +00004189 if (__kmp_storage_map) {
4190 __kmp_print_storage_map_gtid(
4191 gtid, &dispatch->th_disp_buffer[0],
4192 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4193 ? 1
4194 : __kmp_dispatch_num_buffers],
4195 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4196 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4197 gtid, team->t.t_id, gtid);
4198 }
4199 } else {
4200 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004201 }
4202
Jonathan Peyton30419822017-05-12 18:01:32 +00004203 dispatch->th_dispatch_pr_current = 0;
4204 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004205
Jonathan Peyton30419822017-05-12 18:01:32 +00004206 dispatch->th_deo_fcn = 0; /* ORDERED */
4207 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4208 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004209
Jonathan Peyton30419822017-05-12 18:01:32 +00004210 this_thr->th.th_next_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004211
Jonathan Peyton30419822017-05-12 18:01:32 +00004212 if (!this_thr->th.th_task_state_memo_stack) {
4213 size_t i;
4214 this_thr->th.th_task_state_memo_stack =
4215 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8));
4216 this_thr->th.th_task_state_top = 0;
4217 this_thr->th.th_task_state_stack_sz = 4;
4218 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4219 ++i) // zero init the stack
4220 this_thr->th.th_task_state_memo_stack[i] = 0;
4221 }
4222
4223 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4224 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4225
4226 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004227}
4228
Jonathan Peyton30419822017-05-12 18:01:32 +00004229/* allocate a new thread for the requesting team. this is only called from
4230 within a forkjoin critical section. we will first try to get an available
4231 thread from the thread pool. if none is available, we will fork a new one
4232 assuming we are able to create a new one. this should be assured, as the
4233 caller should check on this first. */
4234kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4235 int new_tid) {
4236 kmp_team_t *serial_team;
4237 kmp_info_t *new_thr;
4238 int new_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004239
Jonathan Peyton30419822017-05-12 18:01:32 +00004240 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4241 KMP_DEBUG_ASSERT(root && team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004242#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004243 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004244#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004245 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004246
Jonathan Peyton30419822017-05-12 18:01:32 +00004247 /* first, try to get one from the thread pool */
4248 if (__kmp_thread_pool) {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004249 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00004250 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4251 if (new_thr == __kmp_thread_pool_insert_pt) {
4252 __kmp_thread_pool_insert_pt = NULL;
4253 }
4254 TCW_4(new_thr->th.th_in_pool, FALSE);
4255 // Don't touch th_active_in_pool or th_active.
4256 // The worker thread adjusts those flags as it sleeps/awakens.
4257 __kmp_thread_pool_nth--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004258
Jonathan Peyton30419822017-05-12 18:01:32 +00004259 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4260 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4261 KMP_ASSERT(!new_thr->th.th_team);
4262 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4263 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004264
Jonathan Peyton30419822017-05-12 18:01:32 +00004265 /* setup the thread structure */
4266 __kmp_initialize_info(new_thr, team, new_tid,
4267 new_thr->th.th_info.ds.ds_gtid);
4268 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004269
Jonathan Peyton30419822017-05-12 18:01:32 +00004270 TCW_4(__kmp_nth, __kmp_nth + 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004271
Jonathan Peyton30419822017-05-12 18:01:32 +00004272 new_thr->th.th_task_state = 0;
4273 new_thr->th.th_task_state_top = 0;
4274 new_thr->th.th_task_state_stack_sz = 4;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004275
Jim Cownie5e8470a2013-09-27 10:38:44 +00004276#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00004277 /* Adjust blocktime back to zero if necessary */
4278 /* Middle initialization might not have occurred yet */
4279 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4280 if (__kmp_nth > __kmp_avail_proc) {
4281 __kmp_zero_bt = TRUE;
4282 }
4283 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004284#endif /* KMP_ADJUST_BLOCKTIME */
4285
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004286#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004287 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4288 // KMP_BARRIER_PARENT_FLAG.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004289 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00004290 kmp_balign_t *balign = new_thr->th.th_bar;
4291 for (b = 0; b < bs_last_barrier; ++b)
4292 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004293#endif
4294
Jonathan Peyton30419822017-05-12 18:01:32 +00004295 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4296 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004297
Jim Cownie5e8470a2013-09-27 10:38:44 +00004298 KMP_MB();
4299 return new_thr;
Jonathan Peyton30419822017-05-12 18:01:32 +00004300 }
4301
4302 /* no, well fork a new one */
4303 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4304 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4305
4306#if KMP_USE_MONITOR
4307 // If this is the first worker thread the RTL is creating, then also
4308 // launch the monitor thread. We try to do this as early as possible.
4309 if (!TCR_4(__kmp_init_monitor)) {
4310 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4311 if (!TCR_4(__kmp_init_monitor)) {
4312 KF_TRACE(10, ("before __kmp_create_monitor\n"));
4313 TCW_4(__kmp_init_monitor, 1);
4314 __kmp_create_monitor(&__kmp_monitor);
4315 KF_TRACE(10, ("after __kmp_create_monitor\n"));
4316#if KMP_OS_WINDOWS
4317 // AC: wait until monitor has started. This is a fix for CQ232808.
4318 // The reason is that if the library is loaded/unloaded in a loop with
4319 // small (parallel) work in between, then there is high probability that
4320 // monitor thread started after the library shutdown. At shutdown it is
4321 // too late to cope with the problem, because when the master is in
4322 // DllMain (process detach) the monitor has no chances to start (it is
4323 // blocked), and master has no means to inform the monitor that the
4324 // library has gone, because all the memory which the monitor can access
4325 // is going to be released/reset.
4326 while (TCR_4(__kmp_init_monitor) < 2) {
4327 KMP_YIELD(TRUE);
4328 }
4329 KF_TRACE(10, ("after monitor thread has started\n"));
4330#endif
4331 }
4332 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4333 }
4334#endif
4335
4336 KMP_MB();
4337 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4338 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4339 }
4340
4341 /* allocate space for it. */
4342 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
4343
4344 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4345
4346 if (__kmp_storage_map) {
4347 __kmp_print_thread_storage_map(new_thr, new_gtid);
4348 }
4349
4350 // add the reserve serialized team, initialized from the team's master thread
4351 {
4352 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4353 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"));
4354 new_thr->th.th_serial_team = serial_team =
4355 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4356#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00004357 ompt_data_none, // root parallel id
Jonathan Peyton30419822017-05-12 18:01:32 +00004358#endif
4359#if OMP_40_ENABLED
4360 proc_bind_default,
4361#endif
4362 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4363 }
4364 KMP_ASSERT(serial_team);
4365 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4366 // execution (it is unused for now).
4367 serial_team->t.t_threads[0] = new_thr;
4368 KF_TRACE(10,
4369 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4370 new_thr));
4371
4372 /* setup the thread structures */
4373 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4374
4375#if USE_FAST_MEMORY
4376 __kmp_initialize_fast_memory(new_thr);
4377#endif /* USE_FAST_MEMORY */
4378
4379#if KMP_USE_BGET
4380 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4381 __kmp_initialize_bget(new_thr);
4382#endif
4383
4384 __kmp_init_random(new_thr); // Initialize random number generator
4385
4386 /* Initialize these only once when thread is grabbed for a team allocation */
4387 KA_TRACE(20,
4388 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4389 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4390
4391 int b;
4392 kmp_balign_t *balign = new_thr->th.th_bar;
4393 for (b = 0; b < bs_last_barrier; ++b) {
4394 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4395 balign[b].bb.team = NULL;
4396 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4397 balign[b].bb.use_oncore_barrier = 0;
4398 }
4399
4400 new_thr->th.th_spin_here = FALSE;
4401 new_thr->th.th_next_waiting = 0;
Jonathan Peytona764af62018-07-19 19:17:00 +00004402#if KMP_OS_UNIX
4403 new_thr->th.th_blocking = false;
4404#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004405
4406#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4407 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4408 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4409 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4410 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4411#endif
Jonathan Peyton92ca6182018-09-07 18:25:49 +00004412#if OMP_50_ENABLED
4413 new_thr->th.th_def_allocator = __kmp_def_allocator;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004414 new_thr->th.th_prev_level = 0;
4415 new_thr->th.th_prev_num_threads = 1;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00004416#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004417
4418 TCW_4(new_thr->th.th_in_pool, FALSE);
4419 new_thr->th.th_active_in_pool = FALSE;
4420 TCW_4(new_thr->th.th_active, TRUE);
4421
4422 /* adjust the global counters */
4423 __kmp_all_nth++;
4424 __kmp_nth++;
4425
4426 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4427 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4428 if (__kmp_adjust_gtid_mode) {
4429 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4430 if (TCR_4(__kmp_gtid_mode) != 2) {
4431 TCW_4(__kmp_gtid_mode, 2);
4432 }
4433 } else {
4434 if (TCR_4(__kmp_gtid_mode) != 1) {
4435 TCW_4(__kmp_gtid_mode, 1);
4436 }
4437 }
4438 }
4439
4440#ifdef KMP_ADJUST_BLOCKTIME
4441 /* Adjust blocktime back to zero if necessary */
4442 /* Middle initialization might not have occurred yet */
4443 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4444 if (__kmp_nth > __kmp_avail_proc) {
4445 __kmp_zero_bt = TRUE;
4446 }
4447 }
4448#endif /* KMP_ADJUST_BLOCKTIME */
4449
4450 /* actually fork it and create the new worker thread */
4451 KF_TRACE(
4452 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4453 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4454 KF_TRACE(10,
4455 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4456
4457 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4458 new_gtid));
4459 KMP_MB();
4460 return new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004461}
4462
Jonathan Peyton30419822017-05-12 18:01:32 +00004463/* Reinitialize team for reuse.
4464 The hot team code calls this case at every fork barrier, so EPCC barrier
4465 test are extremely sensitive to changes in it, esp. writes to the team
4466 struct, which cause a cache invalidation in all threads.
4467 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4468static void __kmp_reinitialize_team(kmp_team_t *team,
4469 kmp_internal_control_t *new_icvs,
4470 ident_t *loc) {
4471 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4472 team->t.t_threads[0], team));
4473 KMP_DEBUG_ASSERT(team && new_icvs);
4474 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4475 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004476
Jonathan Peyton30419822017-05-12 18:01:32 +00004477 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jonathan Peyton30419822017-05-12 18:01:32 +00004478 // Copy ICVs to the master thread's implicit taskdata
4479 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4480 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004481
Jonathan Peyton30419822017-05-12 18:01:32 +00004482 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4483 team->t.t_threads[0], team));
Jim Cownie181b4bb2013-12-23 17:28:57 +00004484}
4485
Jonathan Peyton30419822017-05-12 18:01:32 +00004486/* Initialize the team data structure.
4487 This assumes the t_threads and t_max_nproc are already set.
4488 Also, we don't touch the arguments */
4489static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4490 kmp_internal_control_t *new_icvs,
4491 ident_t *loc) {
4492 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004493
Jonathan Peyton30419822017-05-12 18:01:32 +00004494 /* verify */
4495 KMP_DEBUG_ASSERT(team);
4496 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4497 KMP_DEBUG_ASSERT(team->t.t_threads);
4498 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004499
Jonathan Peyton30419822017-05-12 18:01:32 +00004500 team->t.t_master_tid = 0; /* not needed */
4501 /* team->t.t_master_bar; not needed */
4502 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4503 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004504
Jonathan Peyton30419822017-05-12 18:01:32 +00004505 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4506 team->t.t_next_pool = NULL;
4507 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4508 * up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004509
Jonathan Peyton30419822017-05-12 18:01:32 +00004510 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
4511 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004512
Jonathan Peyton30419822017-05-12 18:01:32 +00004513 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00004514 team->t.t_sched.sched = new_icvs->sched.sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004515
4516#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00004517 team->t.t_fp_control_saved = FALSE; /* not needed */
4518 team->t.t_x87_fpu_control_word = 0; /* not needed */
4519 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004520#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4521
Jonathan Peyton30419822017-05-12 18:01:32 +00004522 team->t.t_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004523
Jonathan Peyton30419822017-05-12 18:01:32 +00004524 team->t.t_ordered.dt.t_value = 0;
4525 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004526
Jonathan Peyton30419822017-05-12 18:01:32 +00004527 memset(&team->t.t_taskq, '\0', sizeof(kmp_taskq_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004528
4529#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004530 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004531#endif
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00004532#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00004533 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00004534#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004535
Jonathan Peyton30419822017-05-12 18:01:32 +00004536 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004537
Jonathan Peyton30419822017-05-12 18:01:32 +00004538 __kmp_reinitialize_team(team, new_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004539
Jonathan Peyton30419822017-05-12 18:01:32 +00004540 KMP_MB();
4541 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004542}
4543
Alp Toker98758b02014-03-02 04:12:06 +00004544#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004545/* Sets full mask for thread and returns old mask, no changes to structures. */
4546static void
Jonathan Peyton30419822017-05-12 18:01:32 +00004547__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4548 if (KMP_AFFINITY_CAPABLE()) {
4549 int status;
4550 if (old_mask != NULL) {
4551 status = __kmp_get_system_affinity(old_mask, TRUE);
4552 int error = errno;
4553 if (status != 0) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00004554 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4555 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00004556 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004557 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004558 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4559 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004560}
4561#endif
4562
Alp Toker98758b02014-03-02 04:12:06 +00004563#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004564
Jim Cownie5e8470a2013-09-27 10:38:44 +00004565// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4566// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004567// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004568// The master thread's partition should already include its current binding.
Jonathan Peyton30419822017-05-12 18:01:32 +00004569static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4570 // Copy the master thread's place partion to the team struct
4571 kmp_info_t *master_th = team->t.t_threads[0];
4572 KMP_DEBUG_ASSERT(master_th != NULL);
4573 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4574 int first_place = master_th->th.th_first_place;
4575 int last_place = master_th->th.th_last_place;
4576 int masters_place = master_th->th.th_current_place;
4577 team->t.t_first_place = first_place;
4578 team->t.t_last_place = last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004579
Jonathan Peyton30419822017-05-12 18:01:32 +00004580 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4581 "bound to place %d partition = [%d,%d]\n",
4582 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4583 team->t.t_id, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004584
Jonathan Peyton30419822017-05-12 18:01:32 +00004585 switch (proc_bind) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004586
Jonathan Peyton30419822017-05-12 18:01:32 +00004587 case proc_bind_default:
4588 // serial teams might have the proc_bind policy set to proc_bind_default. It
4589 // doesn't matter, as we don't rebind master thread for any proc_bind policy
4590 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4591 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004592
Jonathan Peyton30419822017-05-12 18:01:32 +00004593 case proc_bind_master: {
4594 int f;
4595 int n_th = team->t.t_nproc;
4596 for (f = 1; f < n_th; f++) {
4597 kmp_info_t *th = team->t.t_threads[f];
4598 KMP_DEBUG_ASSERT(th != NULL);
4599 th->th.th_first_place = first_place;
4600 th->th.th_last_place = last_place;
4601 th->th.th_new_place = masters_place;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004602#if OMP_50_ENABLED
4603 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4604 team->t.t_display_affinity != 1) {
4605 team->t.t_display_affinity = 1;
4606 }
4607#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004608
Jonathan Peyton30419822017-05-12 18:01:32 +00004609 KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d "
4610 "partition = [%d,%d]\n",
4611 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4612 f, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004613 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004614 } break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004615
Jonathan Peyton30419822017-05-12 18:01:32 +00004616 case proc_bind_close: {
4617 int f;
4618 int n_th = team->t.t_nproc;
4619 int n_places;
4620 if (first_place <= last_place) {
4621 n_places = last_place - first_place + 1;
4622 } else {
4623 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4624 }
4625 if (n_th <= n_places) {
4626 int place = masters_place;
4627 for (f = 1; f < n_th; f++) {
4628 kmp_info_t *th = team->t.t_threads[f];
4629 KMP_DEBUG_ASSERT(th != NULL);
4630
4631 if (place == last_place) {
4632 place = first_place;
4633 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4634 place = 0;
4635 } else {
4636 place++;
4637 }
4638 th->th.th_first_place = first_place;
4639 th->th.th_last_place = last_place;
4640 th->th.th_new_place = place;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004641#if OMP_50_ENABLED
4642 if (__kmp_display_affinity && place != th->th.th_current_place &&
4643 team->t.t_display_affinity != 1) {
4644 team->t.t_display_affinity = 1;
4645 }
4646#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004647
4648 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4649 "partition = [%d,%d]\n",
4650 __kmp_gtid_from_thread(team->t.t_threads[f]),
4651 team->t.t_id, f, place, first_place, last_place));
4652 }
4653 } else {
4654 int S, rem, gap, s_count;
4655 S = n_th / n_places;
4656 s_count = 0;
4657 rem = n_th - (S * n_places);
4658 gap = rem > 0 ? n_places / rem : n_places;
4659 int place = masters_place;
4660 int gap_ct = gap;
4661 for (f = 0; f < n_th; f++) {
4662 kmp_info_t *th = team->t.t_threads[f];
4663 KMP_DEBUG_ASSERT(th != NULL);
4664
4665 th->th.th_first_place = first_place;
4666 th->th.th_last_place = last_place;
4667 th->th.th_new_place = place;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004668#if OMP_50_ENABLED
4669 if (__kmp_display_affinity && place != th->th.th_current_place &&
4670 team->t.t_display_affinity != 1) {
4671 team->t.t_display_affinity = 1;
4672 }
4673#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004674 s_count++;
4675
4676 if ((s_count == S) && rem && (gap_ct == gap)) {
4677 // do nothing, add an extra thread to place on next iteration
4678 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4679 // we added an extra thread to this place; move to next place
4680 if (place == last_place) {
4681 place = first_place;
4682 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4683 place = 0;
4684 } else {
4685 place++;
4686 }
4687 s_count = 0;
4688 gap_ct = 1;
4689 rem--;
4690 } else if (s_count == S) { // place full; don't add extra
4691 if (place == last_place) {
4692 place = first_place;
4693 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4694 place = 0;
4695 } else {
4696 place++;
4697 }
4698 gap_ct++;
4699 s_count = 0;
4700 }
4701
4702 KA_TRACE(100,
4703 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4704 "partition = [%d,%d]\n",
4705 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4706 th->th.th_new_place, first_place, last_place));
4707 }
4708 KMP_DEBUG_ASSERT(place == masters_place);
4709 }
4710 } break;
4711
4712 case proc_bind_spread: {
4713 int f;
4714 int n_th = team->t.t_nproc;
4715 int n_places;
4716 int thidx;
4717 if (first_place <= last_place) {
4718 n_places = last_place - first_place + 1;
4719 } else {
4720 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4721 }
4722 if (n_th <= n_places) {
Paul Osmialowskia0162792017-08-10 23:04:11 +00004723 int place = -1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004724
Paul Osmialowskia0162792017-08-10 23:04:11 +00004725 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4726 int S = n_places / n_th;
4727 int s_count, rem, gap, gap_ct;
4728
4729 place = masters_place;
4730 rem = n_places - n_th * S;
4731 gap = rem ? n_th / rem : 1;
4732 gap_ct = gap;
4733 thidx = n_th;
4734 if (update_master_only == 1)
4735 thidx = 1;
4736 for (f = 0; f < thidx; f++) {
4737 kmp_info_t *th = team->t.t_threads[f];
4738 KMP_DEBUG_ASSERT(th != NULL);
4739
4740 th->th.th_first_place = place;
4741 th->th.th_new_place = place;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004742#if OMP_50_ENABLED
4743 if (__kmp_display_affinity && place != th->th.th_current_place &&
4744 team->t.t_display_affinity != 1) {
4745 team->t.t_display_affinity = 1;
4746 }
4747#endif
Paul Osmialowskia0162792017-08-10 23:04:11 +00004748 s_count = 1;
4749 while (s_count < S) {
4750 if (place == last_place) {
4751 place = first_place;
4752 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4753 place = 0;
4754 } else {
4755 place++;
4756 }
4757 s_count++;
4758 }
4759 if (rem && (gap_ct == gap)) {
4760 if (place == last_place) {
4761 place = first_place;
4762 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4763 place = 0;
4764 } else {
4765 place++;
4766 }
4767 rem--;
4768 gap_ct = 0;
4769 }
4770 th->th.th_last_place = place;
4771 gap_ct++;
4772
Jonathan Peyton30419822017-05-12 18:01:32 +00004773 if (place == last_place) {
4774 place = first_place;
4775 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4776 place = 0;
4777 } else {
4778 place++;
4779 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004780
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004781 KA_TRACE(100,
4782 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4783 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4784 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4785 f, th->th.th_new_place, th->th.th_first_place,
4786 th->th.th_last_place, __kmp_affinity_num_masks));
Jonathan Peyton30419822017-05-12 18:01:32 +00004787 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004788 } else {
4789 /* Having uniform space of available computation places I can create
4790 T partitions of round(P/T) size and put threads into the first
4791 place of each partition. */
4792 double current = static_cast<double>(masters_place);
4793 double spacing =
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004794 (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
Paul Osmialowskia0162792017-08-10 23:04:11 +00004795 int first, last;
4796 kmp_info_t *th;
4797
4798 thidx = n_th + 1;
4799 if (update_master_only == 1)
4800 thidx = 1;
4801 for (f = 0; f < thidx; f++) {
4802 first = static_cast<int>(current);
4803 last = static_cast<int>(current + spacing) - 1;
4804 KMP_DEBUG_ASSERT(last >= first);
4805 if (first >= n_places) {
4806 if (masters_place) {
4807 first -= n_places;
4808 last -= n_places;
4809 if (first == (masters_place + 1)) {
4810 KMP_DEBUG_ASSERT(f == n_th);
4811 first--;
4812 }
4813 if (last == masters_place) {
4814 KMP_DEBUG_ASSERT(f == (n_th - 1));
4815 last--;
4816 }
4817 } else {
4818 KMP_DEBUG_ASSERT(f == n_th);
4819 first = 0;
4820 last = 0;
4821 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004822 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004823 if (last >= n_places) {
4824 last = (n_places - 1);
4825 }
4826 place = first;
4827 current += spacing;
4828 if (f < n_th) {
4829 KMP_DEBUG_ASSERT(0 <= first);
4830 KMP_DEBUG_ASSERT(n_places > first);
4831 KMP_DEBUG_ASSERT(0 <= last);
4832 KMP_DEBUG_ASSERT(n_places > last);
4833 KMP_DEBUG_ASSERT(last_place >= first_place);
4834 th = team->t.t_threads[f];
4835 KMP_DEBUG_ASSERT(th);
4836 th->th.th_first_place = first;
4837 th->th.th_new_place = place;
4838 th->th.th_last_place = last;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004839#if OMP_50_ENABLED
4840 if (__kmp_display_affinity && place != th->th.th_current_place &&
4841 team->t.t_display_affinity != 1) {
4842 team->t.t_display_affinity = 1;
4843 }
4844#endif
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004845 KA_TRACE(100,
4846 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4847 "partition = [%d,%d], spacing = %.4f\n",
4848 __kmp_gtid_from_thread(team->t.t_threads[f]),
4849 team->t.t_id, f, th->th.th_new_place,
4850 th->th.th_first_place, th->th.th_last_place, spacing));
Paul Osmialowskia0162792017-08-10 23:04:11 +00004851 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004852 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004853 }
4854 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4855 } else {
4856 int S, rem, gap, s_count;
4857 S = n_th / n_places;
4858 s_count = 0;
4859 rem = n_th - (S * n_places);
4860 gap = rem > 0 ? n_places / rem : n_places;
4861 int place = masters_place;
4862 int gap_ct = gap;
4863 thidx = n_th;
4864 if (update_master_only == 1)
4865 thidx = 1;
4866 for (f = 0; f < thidx; f++) {
4867 kmp_info_t *th = team->t.t_threads[f];
4868 KMP_DEBUG_ASSERT(th != NULL);
4869
4870 th->th.th_first_place = place;
4871 th->th.th_last_place = place;
4872 th->th.th_new_place = place;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00004873#if OMP_50_ENABLED
4874 if (__kmp_display_affinity && place != th->th.th_current_place &&
4875 team->t.t_display_affinity != 1) {
4876 team->t.t_display_affinity = 1;
4877 }
4878#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004879 s_count++;
4880
4881 if ((s_count == S) && rem && (gap_ct == gap)) {
4882 // do nothing, add an extra thread to place on next iteration
4883 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4884 // we added an extra thread to this place; move on to next place
4885 if (place == last_place) {
4886 place = first_place;
4887 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4888 place = 0;
4889 } else {
4890 place++;
4891 }
4892 s_count = 0;
4893 gap_ct = 1;
4894 rem--;
4895 } else if (s_count == S) { // place is full; don't add extra thread
4896 if (place == last_place) {
4897 place = first_place;
4898 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4899 place = 0;
4900 } else {
4901 place++;
4902 }
4903 gap_ct++;
4904 s_count = 0;
4905 }
4906
4907 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4908 "partition = [%d,%d]\n",
4909 __kmp_gtid_from_thread(team->t.t_threads[f]),
4910 team->t.t_id, f, th->th.th_new_place,
4911 th->th.th_first_place, th->th.th_last_place));
4912 }
4913 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4914 }
4915 } break;
4916
4917 default:
4918 break;
4919 }
4920
4921 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004922}
4923
Alp Toker98758b02014-03-02 04:12:06 +00004924#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004925
Jonathan Peyton30419822017-05-12 18:01:32 +00004926/* allocate a new team data structure to use. take one off of the free pool if
4927 available */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004928kmp_team_t *
Jonathan Peyton30419822017-05-12 18:01:32 +00004929__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004930#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00004931 ompt_data_t ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004932#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004933#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004934 kmp_proc_bind_t new_proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00004935#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004936 kmp_internal_control_t *new_icvs,
4937 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4938 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4939 int f;
4940 kmp_team_t *team;
4941 int use_hot_team = !root->r.r_active;
4942 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004943
Jonathan Peyton30419822017-05-12 18:01:32 +00004944 KA_TRACE(20, ("__kmp_allocate_team: called\n"));
4945 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4946 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4947 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004948
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004949#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004950 kmp_hot_team_ptr_t *hot_teams;
4951 if (master) {
4952 team = master->th.th_team;
4953 level = team->t.t_active_level;
4954 if (master->th.th_teams_microtask) { // in teams construct?
4955 if (master->th.th_teams_size.nteams > 1 &&
4956 ( // #teams > 1
4957 team->t.t_pkfn ==
4958 (microtask_t)__kmp_teams_master || // inner fork of the teams
4959 master->th.th_teams_level <
4960 team->t.t_level)) { // or nested parallel inside the teams
4961 ++level; // not increment if #teams==1, or for outer fork of the teams;
4962 // increment otherwise
4963 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004964 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004965 hot_teams = master->th.th_hot_teams;
4966 if (level < __kmp_hot_teams_max_level && hot_teams &&
4967 hot_teams[level]
4968 .hot_team) { // hot team has already been allocated for given level
4969 use_hot_team = 1;
4970 } else {
4971 use_hot_team = 0;
4972 }
4973 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004974#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004975 // Optimization to use a "hot" team
4976 if (use_hot_team && new_nproc > 1) {
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00004977 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004978#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004979 team = hot_teams[level].hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004980#else
Jonathan Peyton30419822017-05-12 18:01:32 +00004981 team = root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004982#endif
4983#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004984 if (__kmp_tasking_mode != tskm_immediate_exec) {
4985 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
4986 "task_team[1] = %p before reinit\n",
4987 team->t.t_task_team[0], team->t.t_task_team[1]));
4988 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004989#endif
4990
Jonathan Peyton30419822017-05-12 18:01:32 +00004991 // Has the number of threads changed?
4992 /* Let's assume the most common case is that the number of threads is
4993 unchanged, and put that case first. */
4994 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4995 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"));
4996 // This case can mean that omp_set_num_threads() was called and the hot
Jonathan Peyton642688b2017-06-01 16:46:36 +00004997 // team size was already reduced, so we check the special flag
Jonathan Peyton30419822017-05-12 18:01:32 +00004998 if (team->t.t_size_changed == -1) {
4999 team->t.t_size_changed = 1;
5000 } else {
5001 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5002 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005003
Jonathan Peyton30419822017-05-12 18:01:32 +00005004 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
5005 kmp_r_sched_t new_sched = new_icvs->sched;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00005006 // set master's schedule as new run-time schedule
5007 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005008
Jonathan Peyton30419822017-05-12 18:01:32 +00005009 __kmp_reinitialize_team(team, new_icvs,
5010 root->r.r_uber_thread->th.th_ident);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005011
Jonathan Peyton30419822017-05-12 18:01:32 +00005012 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5013 team->t.t_threads[0], team));
5014 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005015
5016#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00005017#if KMP_AFFINITY_SUPPORTED
5018 if ((team->t.t_size_changed == 0) &&
5019 (team->t.t_proc_bind == new_proc_bind)) {
5020 if (new_proc_bind == proc_bind_spread) {
5021 __kmp_partition_places(
5022 team, 1); // add flag to update only master for spread
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005023 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005024 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "
5025 "proc_bind = %d, partition = [%d,%d]\n",
5026 team->t.t_id, new_proc_bind, team->t.t_first_place,
5027 team->t.t_last_place));
5028 } else {
5029 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5030 __kmp_partition_places(team);
5031 }
5032#else
5033 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5034#endif /* KMP_AFFINITY_SUPPORTED */
5035#endif /* OMP_40_ENABLED */
5036 } else if (team->t.t_nproc > new_nproc) {
5037 KA_TRACE(20,
5038 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",
5039 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005040
Jonathan Peyton30419822017-05-12 18:01:32 +00005041 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005042#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005043 if (__kmp_hot_teams_mode == 0) {
5044 // AC: saved number of threads should correspond to team's value in this
5045 // mode, can be bigger in mode 1, when hot team has threads in reserve
5046 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5047 hot_teams[level].hot_team_nth = new_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005048#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005049 /* release the extra threads we don't need any more */
5050 for (f = new_nproc; f < team->t.t_nproc; f++) {
5051 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5052 if (__kmp_tasking_mode != tskm_immediate_exec) {
5053 // When decreasing team size, threads no longer in the team should
5054 // unref task team.
5055 team->t.t_threads[f]->th.th_task_team = NULL;
5056 }
5057 __kmp_free_thread(team->t.t_threads[f]);
5058 team->t.t_threads[f] = NULL;
5059 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005060#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005061 } // (__kmp_hot_teams_mode == 0)
5062 else {
5063 // When keeping extra threads in team, switch threads to wait on own
5064 // b_go flag
5065 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5066 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5067 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5068 for (int b = 0; b < bs_last_barrier; ++b) {
5069 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5070 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00005071 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005072 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5073 }
5074 }
5075 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005076#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005077 team->t.t_nproc = new_nproc;
5078 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00005079 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
Jonathan Peyton30419822017-05-12 18:01:32 +00005080 __kmp_reinitialize_team(team, new_icvs,
5081 root->r.r_uber_thread->th.th_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005082
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00005083 // Update remaining threads
Jonathan Peyton30419822017-05-12 18:01:32 +00005084 for (f = 0; f < new_nproc; ++f) {
5085 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5086 }
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00005087
Jonathan Peyton30419822017-05-12 18:01:32 +00005088 // restore the current task state of the master thread: should be the
5089 // implicit task
5090 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5091 team->t.t_threads[0], team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005092
Jonathan Peyton30419822017-05-12 18:01:32 +00005093 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005094
5095#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00005096 for (f = 0; f < team->t.t_nproc; f++) {
5097 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5098 team->t.t_threads[f]->th.th_team_nproc ==
5099 team->t.t_nproc);
5100 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005101#endif
5102
5103#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00005104 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5105#if KMP_AFFINITY_SUPPORTED
5106 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005107#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005108#endif
5109 } else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00005110#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00005111 kmp_affin_mask_t *old_mask;
5112 if (KMP_AFFINITY_CAPABLE()) {
5113 KMP_CPU_ALLOC(old_mask);
5114 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005115#endif
5116
Jonathan Peyton30419822017-05-12 18:01:32 +00005117 KA_TRACE(20,
5118 ("__kmp_allocate_team: increasing hot team thread count to %d\n",
5119 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005120
Jonathan Peyton30419822017-05-12 18:01:32 +00005121 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005122
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005123#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005124 int avail_threads = hot_teams[level].hot_team_nth;
5125 if (new_nproc < avail_threads)
5126 avail_threads = new_nproc;
5127 kmp_info_t **other_threads = team->t.t_threads;
5128 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5129 // Adjust barrier data of reserved threads (if any) of the team
5130 // Other data will be set in __kmp_initialize_info() below.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005131 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00005132 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5133 for (b = 0; b < bs_last_barrier; ++b) {
5134 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5135 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005136#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00005137 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005138#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005139 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005140 }
5141 if (hot_teams[level].hot_team_nth >= new_nproc) {
5142 // we have all needed threads in reserve, no need to allocate any
5143 // this only possible in mode 1, cannot have reserved threads in mode 0
5144 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5145 team->t.t_nproc = new_nproc; // just get reserved threads involved
5146 } else {
5147 // we may have some threads in reserve, but not enough
5148 team->t.t_nproc =
5149 hot_teams[level]
5150 .hot_team_nth; // get reserved threads involved if any
5151 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
5152#endif // KMP_NESTED_HOT_TEAMS
5153 if (team->t.t_max_nproc < new_nproc) {
5154 /* reallocate larger arrays */
5155 __kmp_reallocate_team_arrays(team, new_nproc);
5156 __kmp_reinitialize_team(team, new_icvs, NULL);
5157 }
5158
5159#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5160 /* Temporarily set full mask for master thread before creation of
5161 workers. The reason is that workers inherit the affinity from master,
5162 so if a lot of workers are created on the single core quickly, they
5163 don't get a chance to set their own affinity for a long time. */
5164 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5165#endif
5166
5167 /* allocate new threads for the hot team */
5168 for (f = team->t.t_nproc; f < new_nproc; f++) {
5169 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5170 KMP_DEBUG_ASSERT(new_worker);
5171 team->t.t_threads[f] = new_worker;
5172
5173 KA_TRACE(20,
5174 ("__kmp_allocate_team: team %d init T#%d arrived: "
5175 "join=%llu, plain=%llu\n",
5176 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5177 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5178 team->t.t_bar[bs_plain_barrier].b_arrived));
5179
5180 { // Initialize barrier data for new threads.
5181 int b;
5182 kmp_balign_t *balign = new_worker->th.th_bar;
5183 for (b = 0; b < bs_last_barrier; ++b) {
5184 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5185 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5186 KMP_BARRIER_PARENT_FLAG);
5187#if USE_DEBUGGER
5188 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5189#endif
5190 }
5191 }
5192 }
5193
5194#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5195 if (KMP_AFFINITY_CAPABLE()) {
5196 /* Restore initial master thread's affinity mask */
5197 __kmp_set_system_affinity(old_mask, TRUE);
5198 KMP_CPU_FREE(old_mask);
5199 }
5200#endif
5201#if KMP_NESTED_HOT_TEAMS
5202 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5203#endif // KMP_NESTED_HOT_TEAMS
5204 /* make sure everyone is syncronized */
5205 int old_nproc = team->t.t_nproc; // save old value and use to update only
5206 // new threads below
5207 __kmp_initialize_team(team, new_nproc, new_icvs,
5208 root->r.r_uber_thread->th.th_ident);
5209
5210 /* reinitialize the threads */
5211 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5212 for (f = 0; f < team->t.t_nproc; ++f)
5213 __kmp_initialize_info(team->t.t_threads[f], team, f,
5214 __kmp_gtid_from_tid(f, team));
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00005215
Jonathan Peyton30419822017-05-12 18:01:32 +00005216 if (level) { // set th_task_state for new threads in nested hot team
5217 // __kmp_initialize_info() no longer zeroes th_task_state, so we should
5218 // only need to set the th_task_state for the new threads. th_task_state
5219 // for master thread will not be accurate until after this in
5220 // __kmp_fork_call(), so we look to the master's memo_stack to get the
5221 // correct value.
5222 for (f = old_nproc; f < team->t.t_nproc; ++f)
5223 team->t.t_threads[f]->th.th_task_state =
5224 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5225 } else { // set th_task_state for new threads in non-nested hot team
5226 int old_state =
5227 team->t.t_threads[0]->th.th_task_state; // copy master's state
5228 for (f = old_nproc; f < team->t.t_nproc; ++f)
5229 team->t.t_threads[f]->th.th_task_state = old_state;
5230 }
5231
5232#ifdef KMP_DEBUG
5233 for (f = 0; f < team->t.t_nproc; ++f) {
5234 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5235 team->t.t_threads[f]->th.th_team_nproc ==
5236 team->t.t_nproc);
5237 }
5238#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005239
5240#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00005241 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5242#if KMP_AFFINITY_SUPPORTED
5243 __kmp_partition_places(team);
5244#endif
5245#endif
5246 } // Check changes in number of threads
5247
5248#if OMP_40_ENABLED
5249 kmp_info_t *master = team->t.t_threads[0];
5250 if (master->th.th_teams_microtask) {
5251 for (f = 1; f < new_nproc; ++f) {
5252 // propagate teams construct specific info to workers
5253 kmp_info_t *thr = team->t.t_threads[f];
5254 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5255 thr->th.th_teams_level = master->th.th_teams_level;
5256 thr->th.th_teams_size = master->th.th_teams_size;
5257 }
5258 }
5259#endif /* OMP_40_ENABLED */
5260#if KMP_NESTED_HOT_TEAMS
5261 if (level) {
5262 // Sync barrier state for nested hot teams, not needed for outermost hot
5263 // team.
5264 for (f = 1; f < new_nproc; ++f) {
5265 kmp_info_t *thr = team->t.t_threads[f];
5266 int b;
5267 kmp_balign_t *balign = thr->th.th_bar;
5268 for (b = 0; b < bs_last_barrier; ++b) {
5269 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5270 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5271#if USE_DEBUGGER
5272 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5273#endif
5274 }
5275 }
5276 }
5277#endif // KMP_NESTED_HOT_TEAMS
5278
5279 /* reallocate space for arguments if necessary */
5280 __kmp_alloc_argv_entries(argc, team, TRUE);
5281 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5282 // The hot team re-uses the previous task team,
5283 // if untouched during the previous release->gather phase.
5284
5285 KF_TRACE(10, (" hot_team = %p\n", team));
5286
5287#if KMP_DEBUG
5288 if (__kmp_tasking_mode != tskm_immediate_exec) {
5289 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
5290 "task_team[1] = %p after reinit\n",
5291 team->t.t_task_team[0], team->t.t_task_team[1]));
5292 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005293#endif
5294
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005295#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005296 __ompt_team_assign_id(team, ompt_parallel_data);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005297#endif
5298
Jim Cownie5e8470a2013-09-27 10:38:44 +00005299 KMP_MB();
5300
Jim Cownie5e8470a2013-09-27 10:38:44 +00005301 return team;
Jonathan Peyton30419822017-05-12 18:01:32 +00005302 }
5303
5304 /* next, let's try to take one from the team pool */
5305 KMP_MB();
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005306 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005307 /* TODO: consider resizing undersized teams instead of reaping them, now
5308 that we have a resizing mechanism */
5309 if (team->t.t_max_nproc >= max_nproc) {
5310 /* take this team from the team pool */
5311 __kmp_team_pool = team->t.t_next_pool;
5312
5313 /* setup the team for fresh use */
5314 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5315
5316 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "
5317 "task_team[1] %p to NULL\n",
5318 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5319 team->t.t_task_team[0] = NULL;
5320 team->t.t_task_team[1] = NULL;
5321
5322 /* reallocate space for arguments if necessary */
5323 __kmp_alloc_argv_entries(argc, team, TRUE);
5324 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5325
5326 KA_TRACE(
5327 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5328 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5329 { // Initialize barrier data.
5330 int b;
5331 for (b = 0; b < bs_last_barrier; ++b) {
5332 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5333#if USE_DEBUGGER
5334 team->t.t_bar[b].b_master_arrived = 0;
5335 team->t.t_bar[b].b_team_arrived = 0;
5336#endif
5337 }
5338 }
5339
5340#if OMP_40_ENABLED
5341 team->t.t_proc_bind = new_proc_bind;
5342#endif
5343
5344 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",
5345 team->t.t_id));
5346
5347#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005348 __ompt_team_assign_id(team, ompt_parallel_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005349#endif
5350
5351 KMP_MB();
5352
5353 return team;
5354 }
5355
Jonathan Peyton94a114f2017-10-20 19:30:57 +00005356 /* reap team if it is too small, then loop back and check the next one */
5357 // not sure if this is wise, but, will be redone during the hot-teams
5358 // rewrite.
5359 /* TODO: Use technique to find the right size hot-team, don't reap them */
Jonathan Peyton30419822017-05-12 18:01:32 +00005360 team = __kmp_reap_team(team);
5361 __kmp_team_pool = team;
5362 }
5363
5364 /* nothing available in the pool, no matter, make a new team! */
5365 KMP_MB();
5366 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t));
5367
5368 /* and set it up */
5369 team->t.t_max_nproc = max_nproc;
5370 /* NOTE well, for some reason allocating one big buffer and dividing it up
5371 seems to really hurt performance a lot on the P4, so, let's not use this */
5372 __kmp_allocate_team_arrays(team, max_nproc);
5373
5374 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"));
5375 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5376
5377 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5378 "%p to NULL\n",
5379 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5380 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes
5381 // memory, no need to duplicate
5382 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes
5383 // memory, no need to duplicate
5384
5385 if (__kmp_storage_map) {
5386 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5387 }
5388
5389 /* allocate space for arguments */
5390 __kmp_alloc_argv_entries(argc, team, FALSE);
5391 team->t.t_argc = argc;
5392
5393 KA_TRACE(20,
5394 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5395 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5396 { // Initialize barrier data.
5397 int b;
5398 for (b = 0; b < bs_last_barrier; ++b) {
5399 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5400#if USE_DEBUGGER
5401 team->t.t_bar[b].b_master_arrived = 0;
5402 team->t.t_bar[b].b_team_arrived = 0;
5403#endif
5404 }
5405 }
5406
5407#if OMP_40_ENABLED
5408 team->t.t_proc_bind = new_proc_bind;
5409#endif
5410
5411#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005412 __ompt_team_assign_id(team, ompt_parallel_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005413 team->t.ompt_serialized_team_info = NULL;
5414#endif
5415
5416 KMP_MB();
5417
5418 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",
5419 team->t.t_id));
5420
5421 return team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005422}
5423
5424/* TODO implement hot-teams at all levels */
5425/* TODO implement lazy thread release on demand (disband request) */
5426
5427/* free the team. return it to the team pool. release all the threads
5428 * associated with it */
Jonathan Peyton30419822017-05-12 18:01:32 +00005429void __kmp_free_team(kmp_root_t *root,
5430 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5431 int f;
5432 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5433 team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005434
Jonathan Peyton30419822017-05-12 18:01:32 +00005435 /* verify state */
5436 KMP_DEBUG_ASSERT(root);
5437 KMP_DEBUG_ASSERT(team);
5438 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5439 KMP_DEBUG_ASSERT(team->t.t_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005440
Jonathan Peyton30419822017-05-12 18:01:32 +00005441 int use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005442#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005443 int level;
5444 kmp_hot_team_ptr_t *hot_teams;
5445 if (master) {
5446 level = team->t.t_active_level - 1;
5447 if (master->th.th_teams_microtask) { // in teams construct?
5448 if (master->th.th_teams_size.nteams > 1) {
5449 ++level; // level was not increased in teams construct for
5450 // team_of_masters
5451 }
5452 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5453 master->th.th_teams_level == team->t.t_level) {
5454 ++level; // level was not increased in teams construct for
5455 // team_of_workers before the parallel
5456 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005457 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005458 hot_teams = master->th.th_hot_teams;
5459 if (level < __kmp_hot_teams_max_level) {
5460 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5461 use_hot_team = 1;
5462 }
5463 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005464#endif // KMP_NESTED_HOT_TEAMS
5465
Jonathan Peyton30419822017-05-12 18:01:32 +00005466 /* team is done working */
5467 TCW_SYNC_PTR(team->t.t_pkfn,
5468 NULL); // Important for Debugging Support Library.
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00005469#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00005470 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00005471#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005472 // Do not reset pointer to parent team to NULL for hot teams.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005473
Jonathan Peyton30419822017-05-12 18:01:32 +00005474 /* if we are non-hot team, release our threads */
5475 if (!use_hot_team) {
5476 if (__kmp_tasking_mode != tskm_immediate_exec) {
5477 // Wait for threads to reach reapable state
5478 for (f = 1; f < team->t.t_nproc; ++f) {
5479 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5480 kmp_info_t *th = team->t.t_threads[f];
5481 volatile kmp_uint32 *state = &th->th.th_reap_state;
5482 while (*state != KMP_SAFE_TO_REAP) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005483#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00005484 // On Windows a thread can be killed at any time, check this
5485 DWORD ecode;
5486 if (!__kmp_is_thread_alive(th, &ecode)) {
5487 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
5488 break;
5489 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005490#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005491 // first check if thread is sleeping
5492 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5493 if (fl.is_sleeping())
5494 fl.resume(__kmp_gtid_from_thread(th));
5495 KMP_CPU_PAUSE();
5496 }
5497 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005498
Jonathan Peyton30419822017-05-12 18:01:32 +00005499 // Delete task teams
5500 int tt_idx;
5501 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5502 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5503 if (task_team != NULL) {
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00005504 for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams
5505 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
Jonathan Peyton30419822017-05-12 18:01:32 +00005506 team->t.t_threads[f]->th.th_task_team = NULL;
5507 }
5508 KA_TRACE(
5509 20,
5510 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5511 __kmp_get_gtid(), task_team, team->t.t_id));
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005512#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005513 __kmp_free_task_team(master, task_team);
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005514#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005515 team->t.t_task_team[tt_idx] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005516 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005517 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005518 }
5519
Jonathan Peyton30419822017-05-12 18:01:32 +00005520 // Reset pointer to parent team only for non-hot teams.
5521 team->t.t_parent = NULL;
5522 team->t.t_level = 0;
5523 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005524
Jonathan Peyton30419822017-05-12 18:01:32 +00005525 /* free the worker threads */
5526 for (f = 1; f < team->t.t_nproc; ++f) {
5527 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5528 __kmp_free_thread(team->t.t_threads[f]);
5529 team->t.t_threads[f] = NULL;
5530 }
5531
5532 /* put the team back in the team pool */
5533 /* TODO limit size of team pool, call reap_team if pool too large */
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005534 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005535 __kmp_team_pool = (volatile kmp_team_t *)team;
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00005536 } else { // Check if team was created for the masters in a teams construct
5537 // See if first worker is a CG root
5538 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5539 team->t.t_threads[1]->th.th_cg_roots);
5540 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5541 // Clean up the CG root nodes on workers so that this team can be re-used
5542 for (f = 1; f < team->t.t_nproc; ++f) {
5543 kmp_info_t *thr = team->t.t_threads[f];
5544 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5545 thr->th.th_cg_roots->cg_root == thr);
5546 // Pop current CG root off list
5547 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5548 thr->th.th_cg_roots = tmp->up;
5549 KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving"
5550 " up to node %p. cg_nthreads was %d\n",
5551 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5552 __kmp_free(tmp);
5553 // Restore current task's thread_limit from CG root
5554 if (thr->th.th_cg_roots)
5555 thr->th.th_current_task->td_icvs.thread_limit =
5556 thr->th.th_cg_roots->cg_thread_limit;
5557 }
5558 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005559 }
5560
5561 KMP_MB();
5562}
Jim Cownie5e8470a2013-09-27 10:38:44 +00005563
5564/* reap the team. destroy it, reclaim all its resources and free its memory */
Jonathan Peyton30419822017-05-12 18:01:32 +00005565kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5566 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005567
Jonathan Peyton30419822017-05-12 18:01:32 +00005568 KMP_DEBUG_ASSERT(team);
5569 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5570 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5571 KMP_DEBUG_ASSERT(team->t.t_threads);
5572 KMP_DEBUG_ASSERT(team->t.t_argv);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005573
Jonathan Peyton30419822017-05-12 18:01:32 +00005574 /* TODO clean the threads that are a part of this? */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005575
Jonathan Peyton30419822017-05-12 18:01:32 +00005576 /* free stuff */
5577 __kmp_free_team_arrays(team);
5578 if (team->t.t_argv != &team->t.t_inline_argv[0])
5579 __kmp_free((void *)team->t.t_argv);
5580 __kmp_free(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005581
Jonathan Peyton30419822017-05-12 18:01:32 +00005582 KMP_MB();
5583 return next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005584}
5585
Jim Cownie5e8470a2013-09-27 10:38:44 +00005586// Free the thread. Don't reap it, just place it on the pool of available
5587// threads.
5588//
5589// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5590// binding for the affinity mechanism to be useful.
5591//
5592// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5593// However, we want to avoid a potential performance problem by always
5594// scanning through the list to find the correct point at which to insert
5595// the thread (potential N**2 behavior). To do this we keep track of the
5596// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5597// With single-level parallelism, threads will always be added to the tail
5598// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5599// parallelism, all bets are off and we may need to scan through the entire
5600// free list.
5601//
5602// This change also has a potentially large performance benefit, for some
5603// applications. Previously, as threads were freed from the hot team, they
5604// would be placed back on the free list in inverse order. If the hot team
5605// grew back to it's original size, then the freed thread would be placed
5606// back on the hot team in reverse order. This could cause bad cache
5607// locality problems on programs where the size of the hot team regularly
5608// grew and shrunk.
5609//
5610// Now, for single-level parallelism, the OMP tid is alway == gtid.
Jonathan Peyton30419822017-05-12 18:01:32 +00005611void __kmp_free_thread(kmp_info_t *this_th) {
5612 int gtid;
5613 kmp_info_t **scan;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005614
Jonathan Peyton30419822017-05-12 18:01:32 +00005615 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5616 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005617
Jonathan Peyton30419822017-05-12 18:01:32 +00005618 KMP_DEBUG_ASSERT(this_th);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005619
Jonathan Peyton30419822017-05-12 18:01:32 +00005620 // When moving thread to pool, switch thread to wait on own b_go flag, and
5621 // uninitialized (NULL team).
5622 int b;
5623 kmp_balign_t *balign = this_th->th.th_bar;
5624 for (b = 0; b < bs_last_barrier; ++b) {
5625 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5626 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5627 balign[b].bb.team = NULL;
5628 balign[b].bb.leaf_kids = 0;
5629 }
5630 this_th->th.th_task_state = 0;
Andrey Churbanov3336aa02018-03-19 18:05:15 +00005631 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
Jonathan Peyton30419822017-05-12 18:01:32 +00005632
5633 /* put thread back on the free pool */
5634 TCW_PTR(this_th->th.th_team, NULL);
5635 TCW_PTR(this_th->th.th_root, NULL);
5636 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5637
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00005638 while (this_th->th.th_cg_roots) {
5639 this_th->th.th_cg_roots->cg_nthreads--;
5640 KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5641 " %p of thread %p to %d\n",
5642 this_th, this_th->th.th_cg_roots,
5643 this_th->th.th_cg_roots->cg_root,
5644 this_th->th.th_cg_roots->cg_nthreads));
5645 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5646 if (tmp->cg_root == this_th) { // Thread is a cg_root
5647 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5648 KA_TRACE(
5649 5, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5650 this_th->th.th_cg_roots = tmp->up;
5651 __kmp_free(tmp);
5652 } else { // Worker thread
5653 this_th->th.th_cg_roots = NULL;
5654 break;
5655 }
5656 }
5657
Jonathan Peytonbff8ded2018-01-10 18:24:09 +00005658 /* If the implicit task assigned to this thread can be used by other threads
5659 * -> multiple threads can share the data and try to free the task at
5660 * __kmp_reap_thread at exit. This duplicate use of the task data can happen
5661 * with higher probability when hot team is disabled but can occurs even when
5662 * the hot team is enabled */
5663 __kmp_free_implicit_task(this_th);
5664 this_th->th.th_current_task = NULL;
5665
Jonathan Peyton30419822017-05-12 18:01:32 +00005666 // If the __kmp_thread_pool_insert_pt is already past the new insert
5667 // point, then we need to re-scan the entire list.
5668 gtid = this_th->th.th_info.ds.ds_gtid;
5669 if (__kmp_thread_pool_insert_pt != NULL) {
5670 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5671 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5672 __kmp_thread_pool_insert_pt = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005673 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005674 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005675
Jonathan Peyton30419822017-05-12 18:01:32 +00005676 // Scan down the list to find the place to insert the thread.
5677 // scan is the address of a link in the list, possibly the address of
5678 // __kmp_thread_pool itself.
5679 //
5680 // In the absence of nested parallism, the for loop will have 0 iterations.
5681 if (__kmp_thread_pool_insert_pt != NULL) {
5682 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5683 } else {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005684 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005685 }
5686 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5687 scan = &((*scan)->th.th_next_pool))
5688 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005689
Jonathan Peyton30419822017-05-12 18:01:32 +00005690 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5691 // to its address.
5692 TCW_PTR(this_th->th.th_next_pool, *scan);
5693 __kmp_thread_pool_insert_pt = *scan = this_th;
5694 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5695 (this_th->th.th_info.ds.ds_gtid <
5696 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5697 TCW_4(this_th->th.th_in_pool, TRUE);
5698 __kmp_thread_pool_nth++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005699
Jonathan Peyton30419822017-05-12 18:01:32 +00005700 TCW_4(__kmp_nth, __kmp_nth - 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005701
5702#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005703 /* Adjust blocktime back to user setting or default if necessary */
5704 /* Middle initialization might never have occurred */
5705 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5706 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5707 if (__kmp_nth <= __kmp_avail_proc) {
5708 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005709 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005710 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005711#endif /* KMP_ADJUST_BLOCKTIME */
5712
Jonathan Peyton30419822017-05-12 18:01:32 +00005713 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005714}
5715
Jim Cownie5e8470a2013-09-27 10:38:44 +00005716/* ------------------------------------------------------------------------ */
5717
Jonathan Peyton30419822017-05-12 18:01:32 +00005718void *__kmp_launch_thread(kmp_info_t *this_thr) {
5719 int gtid = this_thr->th.th_info.ds.ds_gtid;
5720 /* void *stack_data;*/
5721 kmp_team_t *(*volatile pteam);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005722
Jonathan Peyton30419822017-05-12 18:01:32 +00005723 KMP_MB();
5724 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005725
Jonathan Peyton30419822017-05-12 18:01:32 +00005726 if (__kmp_env_consistency_check) {
5727 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
5728 }
5729
5730#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005731 ompt_data_t *thread_data;
5732 if (ompt_enabled.enabled) {
5733 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
Jonathan Peyton3574f282018-10-04 14:57:04 +00005734 *thread_data = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00005735
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00005736 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005737 this_thr->th.ompt_thread_info.wait_id = 0;
Joachim Protze82e94a52017-11-01 10:08:30 +00005738 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5739 if (ompt_enabled.ompt_callback_thread_begin) {
5740 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5741 ompt_thread_worker, thread_data);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005742 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005743 }
5744#endif
5745
Joachim Protze82e94a52017-11-01 10:08:30 +00005746#if OMPT_SUPPORT
5747 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00005748 this_thr->th.ompt_thread_info.state = ompt_state_idle;
Joachim Protze82e94a52017-11-01 10:08:30 +00005749 }
5750#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005751 /* This is the place where threads wait for work */
5752 while (!TCR_4(__kmp_global.g.g_done)) {
5753 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5754 KMP_MB();
5755
5756 /* wait for work to do */
5757 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005758
Jonathan Peyton30419822017-05-12 18:01:32 +00005759 /* No tid yet since not part of a team */
5760 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5761
5762#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005763 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00005764 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005765 }
5766#endif
5767
5768 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5769
5770 /* have we been allocated? */
5771 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005772 /* we were just woken up, so run our new task */
5773 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5774 int rc;
5775 KA_TRACE(20,
5776 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5777 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5778 (*pteam)->t.t_pkfn));
5779
5780 updateHWFPControl(*pteam);
5781
5782#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005783 if (ompt_enabled.enabled) {
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00005784 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00005785 }
5786#endif
5787
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00005788 rc = (*pteam)->t.t_invoke(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00005789 KMP_ASSERT(rc);
5790
Jim Cownie5e8470a2013-09-27 10:38:44 +00005791 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00005792 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5793 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5794 (*pteam)->t.t_pkfn));
5795 }
Joachim Protze82e94a52017-11-01 10:08:30 +00005796#if OMPT_SUPPORT
5797 if (ompt_enabled.enabled) {
5798 /* no frame set while outside task */
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00005799 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
Joachim Protze82e94a52017-11-01 10:08:30 +00005800
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00005801 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005802 }
5803#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00005804 /* join barrier after parallel region */
5805 __kmp_join_barrier(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005806 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005807 }
5808 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005809
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005810#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005811 if (ompt_enabled.ompt_callback_thread_end) {
5812 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005813 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005814#endif
5815
Jonathan Peyton30419822017-05-12 18:01:32 +00005816 this_thr->th.th_task_team = NULL;
5817 /* run the destructors for the threadprivate data for this thread */
5818 __kmp_common_destroy_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005819
Jonathan Peyton30419822017-05-12 18:01:32 +00005820 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
5821 KMP_MB();
5822 return this_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005823}
5824
5825/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005826
Jonathan Peyton30419822017-05-12 18:01:32 +00005827void __kmp_internal_end_dest(void *specific_gtid) {
5828#if KMP_COMPILER_ICC
5829#pragma warning(push)
5830#pragma warning(disable : 810) // conversion from "void *" to "int" may lose
5831// significant bits
5832#endif
5833 // Make sure no significant bits are lost
5834 int gtid = (kmp_intptr_t)specific_gtid - 1;
5835#if KMP_COMPILER_ICC
5836#pragma warning(pop)
5837#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005838
Jonathan Peyton30419822017-05-12 18:01:32 +00005839 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5840 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5841 * this is because 0 is reserved for the nothing-stored case */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005842
Jonathan Peyton30419822017-05-12 18:01:32 +00005843 /* josh: One reason for setting the gtid specific data even when it is being
5844 destroyed by pthread is to allow gtid lookup through thread specific data
5845 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5846 that gets executed in the call to __kmp_internal_end_thread, actually
5847 gets the gtid through the thread specific data. Setting it here seems
5848 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5849 to run smoothly.
5850 todo: get rid of this after we remove the dependence on
5851 __kmp_gtid_get_specific */
5852 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5853 __kmp_gtid_set_specific(gtid);
5854#ifdef KMP_TDATA_GTID
5855 __kmp_gtid = gtid;
5856#endif
5857 __kmp_internal_end_thread(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005858}
5859
Jonathan Peyton99016992015-05-26 17:32:53 +00005860#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005861
Jonathan Peyton30419822017-05-12 18:01:32 +00005862// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases
5863// destructors work perfectly, but in real libomp.so I have no evidence it is
5864// ever called. However, -fini linker option in makefile.mk works fine.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005865
Jonathan Peyton30419822017-05-12 18:01:32 +00005866__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
5867 __kmp_internal_end_atexit();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005868}
5869
Jonathan Peyton30419822017-05-12 18:01:32 +00005870void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005871
5872#endif
5873
Jonathan Peyton30419822017-05-12 18:01:32 +00005874/* [Windows] josh: when the atexit handler is called, there may still be more
5875 than one thread alive */
5876void __kmp_internal_end_atexit(void) {
5877 KA_TRACE(30, ("__kmp_internal_end_atexit\n"));
5878 /* [Windows]
5879 josh: ideally, we want to completely shutdown the library in this atexit
5880 handler, but stat code that depends on thread specific data for gtid fails
5881 because that data becomes unavailable at some point during the shutdown, so
5882 we call __kmp_internal_end_thread instead. We should eventually remove the
5883 dependency on __kmp_get_specific_gtid in the stat code and use
5884 __kmp_internal_end_library to cleanly shutdown the library.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005885
Jonathan Peyton30419822017-05-12 18:01:32 +00005886 // TODO: Can some of this comment about GVS be removed?
5887 I suspect that the offending stat code is executed when the calling thread
5888 tries to clean up a dead root thread's data structures, resulting in GVS
5889 code trying to close the GVS structures for that thread, but since the stat
5890 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
5891 the calling thread is cleaning up itself instead of another thread, it get
5892 confused. This happens because allowing a thread to unregister and cleanup
5893 another thread is a recent modification for addressing an issue.
5894 Based on the current design (20050722), a thread may end up
5895 trying to unregister another thread only if thread death does not trigger
5896 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
5897 thread specific data destructor function to detect thread death. For
5898 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
5899 is nothing. Thus, the workaround is applicable only for Windows static
5900 stat library. */
5901 __kmp_internal_end_library(-1);
5902#if KMP_OS_WINDOWS
5903 __kmp_close_console();
5904#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005905}
5906
Jonathan Peyton30419822017-05-12 18:01:32 +00005907static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
5908 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005909
Jonathan Peyton30419822017-05-12 18:01:32 +00005910 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005911
Jonathan Peyton30419822017-05-12 18:01:32 +00005912 KMP_DEBUG_ASSERT(thread != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005913
Jonathan Peyton30419822017-05-12 18:01:32 +00005914 gtid = thread->th.th_info.ds.ds_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005915
Jonathan Peyton30419822017-05-12 18:01:32 +00005916 if (!is_root) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005917 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5918 /* Assume the threads are at the fork barrier here */
5919 KA_TRACE(
5920 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5921 gtid));
5922 /* Need release fence here to prevent seg faults for tree forkjoin barrier
5923 * (GEH) */
5924 ANNOTATE_HAPPENS_BEFORE(thread);
5925 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5926 __kmp_release_64(&flag);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005927 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005928
Jonathan Peyton30419822017-05-12 18:01:32 +00005929 // Terminate OS thread.
5930 __kmp_reap_worker(thread);
Jonathan Peyton7ca7ef02016-11-21 16:18:57 +00005931
Jonathan Peyton30419822017-05-12 18:01:32 +00005932 // The thread was killed asynchronously. If it was actively
5933 // spinning in the thread pool, decrement the global count.
5934 //
5935 // There is a small timing hole here - if the worker thread was just waking
5936 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
5937 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
5938 // the global counter might not get updated.
5939 //
5940 // Currently, this can only happen as the library is unloaded,
5941 // so there are no harmful side effects.
5942 if (thread->th.th_active_in_pool) {
5943 thread->th.th_active_in_pool = FALSE;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00005944 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
5945 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
Jonathan Peyton30419822017-05-12 18:01:32 +00005946 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005947
Jonathan Peyton30419822017-05-12 18:01:32 +00005948 // Decrement # of [worker] threads in the pool.
5949 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5950 --__kmp_thread_pool_nth;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005951 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005952
Jonathan Peyton30419822017-05-12 18:01:32 +00005953 __kmp_free_implicit_task(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005954
Jonathan Peyton30419822017-05-12 18:01:32 +00005955// Free the fast memory for tasking
5956#if USE_FAST_MEMORY
5957 __kmp_free_fast_memory(thread);
5958#endif /* USE_FAST_MEMORY */
5959
5960 __kmp_suspend_uninitialize_thread(thread);
5961
5962 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5963 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5964
5965 --__kmp_all_nth;
5966// __kmp_nth was decremented when thread is added to the pool.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005967
5968#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005969 /* Adjust blocktime back to user setting or default if necessary */
5970 /* Middle initialization might never have occurred */
5971 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5972 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5973 if (__kmp_nth <= __kmp_avail_proc) {
5974 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005975 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005976 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005977#endif /* KMP_ADJUST_BLOCKTIME */
5978
Jonathan Peyton30419822017-05-12 18:01:32 +00005979 /* free the memory being used */
5980 if (__kmp_env_consistency_check) {
5981 if (thread->th.th_cons) {
5982 __kmp_free_cons_stack(thread->th.th_cons);
5983 thread->th.th_cons = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005984 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005985 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005986
Jonathan Peyton30419822017-05-12 18:01:32 +00005987 if (thread->th.th_pri_common != NULL) {
5988 __kmp_free(thread->th.th_pri_common);
5989 thread->th.th_pri_common = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005990 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005991
Jonathan Peyton30419822017-05-12 18:01:32 +00005992 if (thread->th.th_task_state_memo_stack != NULL) {
5993 __kmp_free(thread->th.th_task_state_memo_stack);
5994 thread->th.th_task_state_memo_stack = NULL;
5995 }
5996
5997#if KMP_USE_BGET
5998 if (thread->th.th_local.bget_data != NULL) {
5999 __kmp_finalize_bget(thread);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006000 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006001#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006002
Alp Toker98758b02014-03-02 04:12:06 +00006003#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006004 if (thread->th.th_affin_mask != NULL) {
6005 KMP_CPU_FREE(thread->th.th_affin_mask);
6006 thread->th.th_affin_mask = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006007 }
Alp Toker98758b02014-03-02 04:12:06 +00006008#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006009
Jonathan Peytonf6399362018-07-09 17:51:13 +00006010#if KMP_USE_HIER_SCHED
6011 if (thread->th.th_hier_bar_data != NULL) {
6012 __kmp_free(thread->th.th_hier_bar_data);
6013 thread->th.th_hier_bar_data = NULL;
6014 }
6015#endif
6016
Jonathan Peyton30419822017-05-12 18:01:32 +00006017 __kmp_reap_team(thread->th.th_serial_team);
6018 thread->th.th_serial_team = NULL;
6019 __kmp_free(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006020
Jonathan Peyton30419822017-05-12 18:01:32 +00006021 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006022
6023} // __kmp_reap_thread
6024
Jonathan Peyton30419822017-05-12 18:01:32 +00006025static void __kmp_internal_end(void) {
6026 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006027
Jonathan Peyton30419822017-05-12 18:01:32 +00006028 /* First, unregister the library */
6029 __kmp_unregister_library();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006030
Jonathan Peyton30419822017-05-12 18:01:32 +00006031#if KMP_OS_WINDOWS
6032 /* In Win static library, we can't tell when a root actually dies, so we
6033 reclaim the data structures for any root threads that have died but not
6034 unregistered themselves, in order to shut down cleanly.
6035 In Win dynamic library we also can't tell when a thread dies. */
6036 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
6037// dead roots
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006038#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006039
Jonathan Peyton30419822017-05-12 18:01:32 +00006040 for (i = 0; i < __kmp_threads_capacity; i++)
6041 if (__kmp_root[i])
6042 if (__kmp_root[i]->r.r_active)
6043 break;
6044 KMP_MB(); /* Flush all pending memory write invalidates. */
6045 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6046
6047 if (i < __kmp_threads_capacity) {
6048#if KMP_USE_MONITOR
6049 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
6050 KMP_MB(); /* Flush all pending memory write invalidates. */
6051
Jonathan Peyton94a114f2017-10-20 19:30:57 +00006052 // Need to check that monitor was initialized before reaping it. If we are
6053 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
6054 // __kmp_monitor will appear to contain valid data, but it is only valid in
6055 // the parent process, not the child.
Jonathan Peyton30419822017-05-12 18:01:32 +00006056 // New behavior (201008): instead of keying off of the flag
6057 // __kmp_init_parallel, the monitor thread creation is keyed off
6058 // of the new flag __kmp_init_monitor.
6059 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6060 if (TCR_4(__kmp_init_monitor)) {
6061 __kmp_reap_monitor(&__kmp_monitor);
6062 TCW_4(__kmp_init_monitor, 0);
6063 }
6064 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6065 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
6066#endif // KMP_USE_MONITOR
6067 } else {
6068/* TODO move this to cleanup code */
6069#ifdef KMP_DEBUG
6070 /* make sure that everything has properly ended */
6071 for (i = 0; i < __kmp_threads_capacity; i++) {
6072 if (__kmp_root[i]) {
6073 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
6074 // there can be uber threads alive here
6075 KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active?
6076 }
6077 }
6078#endif
6079
6080 KMP_MB();
6081
6082 // Reap the worker threads.
6083 // This is valid for now, but be careful if threads are reaped sooner.
6084 while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool.
6085 // Get the next thread from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00006086 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00006087 __kmp_thread_pool = thread->th.th_next_pool;
6088 // Reap it.
6089 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6090 thread->th.th_next_pool = NULL;
6091 thread->th.th_in_pool = FALSE;
6092 __kmp_reap_thread(thread, 0);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006093 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006094 __kmp_thread_pool_insert_pt = NULL;
6095
6096 // Reap teams.
6097 while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool.
6098 // Get the next team from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00006099 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00006100 __kmp_team_pool = team->t.t_next_pool;
6101 // Reap it.
6102 team->t.t_next_pool = NULL;
6103 __kmp_reap_team(team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006104 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006105
6106 __kmp_reap_task_teams();
6107
Jonathan Peytona764af62018-07-19 19:17:00 +00006108#if KMP_OS_UNIX
6109 // Threads that are not reaped should not access any resources since they
6110 // are going to be deallocated soon, so the shutdown sequence should wait
6111 // until all threads either exit the final spin-waiting loop or begin
6112 // sleeping after the given blocktime.
6113 for (i = 0; i < __kmp_threads_capacity; i++) {
6114 kmp_info_t *thr = __kmp_threads[i];
6115 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6116 KMP_CPU_PAUSE();
6117 }
6118#endif
6119
Jonathan Peyton30419822017-05-12 18:01:32 +00006120 for (i = 0; i < __kmp_threads_capacity; ++i) {
6121 // TBD: Add some checking...
6122 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
6123 }
6124
6125 /* Make sure all threadprivate destructors get run by joining with all
6126 worker threads before resetting this flag */
6127 TCW_SYNC_4(__kmp_init_common, FALSE);
6128
6129 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"));
6130 KMP_MB();
6131
6132#if KMP_USE_MONITOR
6133 // See note above: One of the possible fixes for CQ138434 / CQ140126
6134 //
6135 // FIXME: push both code fragments down and CSE them?
6136 // push them into __kmp_cleanup() ?
6137 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6138 if (TCR_4(__kmp_init_monitor)) {
6139 __kmp_reap_monitor(&__kmp_monitor);
6140 TCW_4(__kmp_init_monitor, 0);
6141 }
6142 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6143 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
6144#endif
6145 } /* else !__kmp_global.t_active */
6146 TCW_4(__kmp_init_gtid, FALSE);
6147 KMP_MB(); /* Flush all pending memory write invalidates. */
6148
6149 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006150#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006151 ompt_fini();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006152#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006153}
6154
Jonathan Peyton30419822017-05-12 18:01:32 +00006155void __kmp_internal_end_library(int gtid_req) {
6156 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6157 /* this shouldn't be a race condition because __kmp_internal_end() is the
6158 only place to clear __kmp_serial_init */
6159 /* we'll check this later too, after we get the lock */
6160 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6161 // redundaant, because the next check will work in any case.
6162 if (__kmp_global.g.g_abort) {
6163 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"));
6164 /* TODO abort? */
6165 return;
6166 }
6167 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6168 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"));
6169 return;
6170 }
6171
6172 KMP_MB(); /* Flush all pending memory write invalidates. */
6173
6174 /* find out who we are and what we should do */
6175 {
6176 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6177 KA_TRACE(
6178 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6179 if (gtid == KMP_GTID_SHUTDOWN) {
6180 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "
6181 "already shutdown\n"));
6182 return;
6183 } else if (gtid == KMP_GTID_MONITOR) {
6184 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "
6185 "registered, or system shutdown\n"));
6186 return;
6187 } else if (gtid == KMP_GTID_DNE) {
6188 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "
6189 "shutdown\n"));
6190 /* we don't know who we are, but we may still shutdown the library */
6191 } else if (KMP_UBER_GTID(gtid)) {
6192 /* unregister ourselves as an uber thread. gtid is no longer valid */
6193 if (__kmp_root[gtid]->r.r_active) {
6194 __kmp_global.g.g_abort = -1;
6195 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6196 KA_TRACE(10,
6197 ("__kmp_internal_end_library: root still active, abort T#%d\n",
6198 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006199 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006200 } else {
6201 KA_TRACE(
6202 10,
6203 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6204 __kmp_unregister_root_current_thread(gtid);
6205 }
6206 } else {
6207/* worker threads may call this function through the atexit handler, if they
6208 * call exit() */
6209/* For now, skip the usual subsequent processing and just dump the debug buffer.
6210 TODO: do a thorough shutdown instead */
6211#ifdef DUMP_DEBUG_ON_EXIT
6212 if (__kmp_debug_buf)
6213 __kmp_dump_debug_buffer();
6214#endif
6215 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006216 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006217 }
6218 /* synchronize the termination process */
6219 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006220
Jonathan Peyton30419822017-05-12 18:01:32 +00006221 /* have we already finished */
6222 if (__kmp_global.g.g_abort) {
6223 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"));
6224 /* TODO abort? */
6225 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6226 return;
6227 }
6228 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6229 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6230 return;
6231 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006232
Jonathan Peyton30419822017-05-12 18:01:32 +00006233 /* We need this lock to enforce mutex between this reading of
6234 __kmp_threads_capacity and the writing by __kmp_register_root.
6235 Alternatively, we can use a counter of roots that is atomically updated by
6236 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6237 __kmp_internal_end_*. */
6238 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006239
Jonathan Peyton30419822017-05-12 18:01:32 +00006240 /* now we can safely conduct the actual termination */
6241 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006242
Jonathan Peyton30419822017-05-12 18:01:32 +00006243 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6244 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006245
Jonathan Peyton30419822017-05-12 18:01:32 +00006246 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006247
Jonathan Peyton30419822017-05-12 18:01:32 +00006248#ifdef DUMP_DEBUG_ON_EXIT
6249 if (__kmp_debug_buf)
6250 __kmp_dump_debug_buffer();
6251#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006252
Jonathan Peyton30419822017-05-12 18:01:32 +00006253#if KMP_OS_WINDOWS
6254 __kmp_close_console();
6255#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006256
Jonathan Peyton30419822017-05-12 18:01:32 +00006257 __kmp_fini_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006258
6259} // __kmp_internal_end_library
6260
Jonathan Peyton30419822017-05-12 18:01:32 +00006261void __kmp_internal_end_thread(int gtid_req) {
6262 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006263
Jonathan Peyton30419822017-05-12 18:01:32 +00006264 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6265 /* this shouldn't be a race condition because __kmp_internal_end() is the
6266 * only place to clear __kmp_serial_init */
6267 /* we'll check this later too, after we get the lock */
6268 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6269 // redundant, because the next check will work in any case.
6270 if (__kmp_global.g.g_abort) {
6271 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"));
6272 /* TODO abort? */
6273 return;
6274 }
6275 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6276 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"));
6277 return;
6278 }
6279
6280 KMP_MB(); /* Flush all pending memory write invalidates. */
6281
6282 /* find out who we are and what we should do */
6283 {
6284 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6285 KA_TRACE(10,
6286 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6287 if (gtid == KMP_GTID_SHUTDOWN) {
6288 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
6289 "already shutdown\n"));
6290 return;
6291 } else if (gtid == KMP_GTID_MONITOR) {
6292 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "
6293 "registered, or system shutdown\n"));
6294 return;
6295 } else if (gtid == KMP_GTID_DNE) {
6296 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "
6297 "shutdown\n"));
6298 return;
6299 /* we don't know who we are */
6300 } else if (KMP_UBER_GTID(gtid)) {
6301 /* unregister ourselves as an uber thread. gtid is no longer valid */
6302 if (__kmp_root[gtid]->r.r_active) {
6303 __kmp_global.g.g_abort = -1;
6304 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6305 KA_TRACE(10,
6306 ("__kmp_internal_end_thread: root still active, abort T#%d\n",
6307 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006308 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006309 } else {
6310 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",
6311 gtid));
6312 __kmp_unregister_root_current_thread(gtid);
6313 }
6314 } else {
6315 /* just a worker thread, let's leave */
6316 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6317
6318 if (gtid >= 0) {
6319 __kmp_threads[gtid]->th.th_task_team = NULL;
6320 }
6321
6322 KA_TRACE(10,
6323 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6324 gtid));
6325 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006326 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006327 }
Jonathan Peyton8b3842f2018-10-05 17:59:39 +00006328#if KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +00006329 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber
6330 // thread, because we will better shutdown later in the library destructor.
6331 // The reason of this change is performance problem when non-openmp thread in
6332 // a loop forks and joins many openmp threads. We can save a lot of time
6333 // keeping worker threads alive until the program shutdown.
6334 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966)
6335 // and Windows(DPD200287443) that occurs when using critical sections from
6336 // foreign threads.
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00006337 if (__kmp_pause_status != kmp_hard_paused) {
6338 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6339 return;
6340 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006341#endif
6342 /* synchronize the termination process */
6343 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006344
Jonathan Peyton30419822017-05-12 18:01:32 +00006345 /* have we already finished */
6346 if (__kmp_global.g.g_abort) {
6347 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"));
6348 /* TODO abort? */
6349 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6350 return;
6351 }
6352 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6353 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6354 return;
6355 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006356
Jonathan Peyton30419822017-05-12 18:01:32 +00006357 /* We need this lock to enforce mutex between this reading of
6358 __kmp_threads_capacity and the writing by __kmp_register_root.
6359 Alternatively, we can use a counter of roots that is atomically updated by
6360 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6361 __kmp_internal_end_*. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006362
Jonathan Peyton30419822017-05-12 18:01:32 +00006363 /* should we finish the run-time? are all siblings done? */
6364 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006365
Jonathan Peyton30419822017-05-12 18:01:32 +00006366 for (i = 0; i < __kmp_threads_capacity; ++i) {
6367 if (KMP_UBER_GTID(i)) {
6368 KA_TRACE(
6369 10,
6370 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6371 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6372 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6373 return;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006374 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006375 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006376
Jonathan Peyton30419822017-05-12 18:01:32 +00006377 /* now we can safely conduct the actual termination */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006378
Jonathan Peyton30419822017-05-12 18:01:32 +00006379 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006380
Jonathan Peyton30419822017-05-12 18:01:32 +00006381 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6382 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006383
Jonathan Peyton30419822017-05-12 18:01:32 +00006384 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006385
Jonathan Peyton30419822017-05-12 18:01:32 +00006386#ifdef DUMP_DEBUG_ON_EXIT
6387 if (__kmp_debug_buf)
6388 __kmp_dump_debug_buffer();
6389#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006390} // __kmp_internal_end_thread
6391
Jonathan Peyton30419822017-05-12 18:01:32 +00006392// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006393// Library registration stuff.
6394
Jonathan Peyton30419822017-05-12 18:01:32 +00006395static long __kmp_registration_flag = 0;
6396// Random value used to indicate library initialization.
6397static char *__kmp_registration_str = NULL;
6398// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006399
Jonathan Peyton30419822017-05-12 18:01:32 +00006400static inline char *__kmp_reg_status_name() {
6401 /* On RHEL 3u5 if linked statically, getpid() returns different values in
6402 each thread. If registration and unregistration go in different threads
6403 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6404 env var can not be found, because the name will contain different pid. */
6405 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00006406} // __kmp_reg_status_get
6407
Jonathan Peyton30419822017-05-12 18:01:32 +00006408void __kmp_register_library_startup(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006409
Jonathan Peyton30419822017-05-12 18:01:32 +00006410 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6411 int done = 0;
6412 union {
6413 double dtime;
6414 long ltime;
6415 } time;
6416#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6417 __kmp_initialize_system_tick();
6418#endif
6419 __kmp_read_system_time(&time.dtime);
6420 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6421 __kmp_registration_str =
6422 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,
6423 __kmp_registration_flag, KMP_LIBRARY_FILE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006424
Jonathan Peyton30419822017-05-12 18:01:32 +00006425 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,
6426 __kmp_registration_str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006427
Jonathan Peyton30419822017-05-12 18:01:32 +00006428 while (!done) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006429
Jonathan Peyton30419822017-05-12 18:01:32 +00006430 char *value = NULL; // Actual value of the environment variable.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006431
Jonathan Peyton30419822017-05-12 18:01:32 +00006432 // Set environment variable, but do not overwrite if it is exist.
6433 __kmp_env_set(name, __kmp_registration_str, 0);
6434 // Check the variable is written.
6435 value = __kmp_env_get(name);
6436 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006437
Jonathan Peyton30419822017-05-12 18:01:32 +00006438 done = 1; // Ok, environment variable set successfully, exit the loop.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006439
Jonathan Peyton30419822017-05-12 18:01:32 +00006440 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006441
Jonathan Peyton30419822017-05-12 18:01:32 +00006442 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6443 // Check whether it alive or dead.
6444 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6445 char *tail = value;
6446 char *flag_addr_str = NULL;
6447 char *flag_val_str = NULL;
6448 char const *file_name = NULL;
6449 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6450 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6451 file_name = tail;
6452 if (tail != NULL) {
6453 long *flag_addr = 0;
6454 long flag_val = 0;
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00006455 KMP_SSCANF(flag_addr_str, "%p", RCAST(void**, &flag_addr));
Jonathan Peyton30419822017-05-12 18:01:32 +00006456 KMP_SSCANF(flag_val_str, "%lx", &flag_val);
6457 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6458 // First, check whether environment-encoded address is mapped into
6459 // addr space.
6460 // If so, dereference it to see if it still has the right value.
6461 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6462 neighbor = 1;
6463 } else {
6464 // If not, then we know the other copy of the library is no longer
6465 // running.
6466 neighbor = 2;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006467 }
6468 }
6469 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006470 switch (neighbor) {
6471 case 0: // Cannot parse environment variable -- neighbor status unknown.
6472 // Assume it is the incompatible format of future version of the
6473 // library. Assume the other library is alive.
6474 // WARN( ... ); // TODO: Issue a warning.
6475 file_name = "unknown library";
Joachim Protze0c599c32019-02-04 15:59:42 +00006476 KMP_FALLTHROUGH();
Jonathan Peyton30419822017-05-12 18:01:32 +00006477 // Attention! Falling to the next case. That's intentional.
6478 case 1: { // Neighbor is alive.
6479 // Check it is allowed.
6480 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6481 if (!__kmp_str_match_true(duplicate_ok)) {
6482 // That's not allowed. Issue fatal error.
Jonathan Peyton6a393f72017-09-05 15:43:58 +00006483 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6484 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006485 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006486 KMP_INTERNAL_FREE(duplicate_ok);
6487 __kmp_duplicate_library_ok = 1;
6488 done = 1; // Exit the loop.
6489 } break;
6490 case 2: { // Neighbor is dead.
6491 // Clear the variable and try to register library again.
6492 __kmp_env_unset(name);
6493 } break;
6494 default: { KMP_DEBUG_ASSERT(0); } break;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006495 }
6496 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006497 KMP_INTERNAL_FREE((void *)value);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006498 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006499 KMP_INTERNAL_FREE((void *)name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006500
6501} // func __kmp_register_library_startup
6502
Jonathan Peyton30419822017-05-12 18:01:32 +00006503void __kmp_unregister_library(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006504
Jonathan Peyton30419822017-05-12 18:01:32 +00006505 char *name = __kmp_reg_status_name();
6506 char *value = __kmp_env_get(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006507
Jonathan Peyton30419822017-05-12 18:01:32 +00006508 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6509 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6510 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6511 // Ok, this is our variable. Delete it.
6512 __kmp_env_unset(name);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006513 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006514
Jonathan Peyton30419822017-05-12 18:01:32 +00006515 KMP_INTERNAL_FREE(__kmp_registration_str);
6516 KMP_INTERNAL_FREE(value);
6517 KMP_INTERNAL_FREE(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006518
Jonathan Peyton30419822017-05-12 18:01:32 +00006519 __kmp_registration_flag = 0;
6520 __kmp_registration_str = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006521
6522} // __kmp_unregister_library
6523
Jim Cownie5e8470a2013-09-27 10:38:44 +00006524// End of Library registration stuff.
Jonathan Peyton30419822017-05-12 18:01:32 +00006525// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006526
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006527#if KMP_MIC_SUPPORTED
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006528
Jonathan Peyton30419822017-05-12 18:01:32 +00006529static void __kmp_check_mic_type() {
6530 kmp_cpuid_t cpuid_state = {0};
6531 kmp_cpuid_t *cs_p = &cpuid_state;
6532 __kmp_x86_cpuid(1, 0, cs_p);
6533 // We don't support mic1 at the moment
6534 if ((cs_p->eax & 0xff0) == 0xB10) {
6535 __kmp_mic_type = mic2;
6536 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6537 __kmp_mic_type = mic3;
6538 } else {
6539 __kmp_mic_type = non_mic;
6540 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006541}
6542
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006543#endif /* KMP_MIC_SUPPORTED */
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006544
Jonathan Peyton30419822017-05-12 18:01:32 +00006545static void __kmp_do_serial_initialize(void) {
6546 int i, gtid;
6547 int size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006548
Jonathan Peyton30419822017-05-12 18:01:32 +00006549 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006550
Jonathan Peyton30419822017-05-12 18:01:32 +00006551 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4);
6552 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4);
6553 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8);
6554 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8);
6555 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006556
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006557#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006558 ompt_pre_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006559#endif
6560
Jonathan Peyton30419822017-05-12 18:01:32 +00006561 __kmp_validate_locks();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006562
Jonathan Peyton30419822017-05-12 18:01:32 +00006563 /* Initialize internal memory allocator */
6564 __kmp_init_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006565
Jonathan Peyton30419822017-05-12 18:01:32 +00006566 /* Register the library startup via an environment variable and check to see
6567 whether another copy of the library is already registered. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006568
Jonathan Peyton30419822017-05-12 18:01:32 +00006569 __kmp_register_library_startup();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006570
Jonathan Peyton30419822017-05-12 18:01:32 +00006571 /* TODO reinitialization of library */
6572 if (TCR_4(__kmp_global.g.g_done)) {
6573 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"));
6574 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006575
Jonathan Peyton30419822017-05-12 18:01:32 +00006576 __kmp_global.g.g_abort = 0;
6577 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006578
Jonathan Peyton30419822017-05-12 18:01:32 +00006579/* initialize the locks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006580#if KMP_USE_ADAPTIVE_LOCKS
6581#if KMP_DEBUG_ADAPTIVE_LOCKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006582 __kmp_init_speculative_stats();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006583#endif
6584#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006585#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006586 __kmp_stats_init();
Jonathan Peytonad579922015-12-17 16:19:05 +00006587#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006588 __kmp_init_lock(&__kmp_global_lock);
6589 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6590 __kmp_init_lock(&__kmp_debug_lock);
6591 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6592 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6593 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6594 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6595 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6596 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6597 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6598 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6599 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6600 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6601 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6602 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6603 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6604 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6605 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006606#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006607 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006608#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006609 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006610
Jonathan Peyton30419822017-05-12 18:01:32 +00006611 /* conduct initialization and initial setup of configuration */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006612
Jonathan Peyton30419822017-05-12 18:01:32 +00006613 __kmp_runtime_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006614
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006615#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006616 __kmp_check_mic_type();
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006617#endif
6618
Jonathan Peyton30419822017-05-12 18:01:32 +00006619// Some global variable initialization moved here from kmp_env_initialize()
Jim Cownie5e8470a2013-09-27 10:38:44 +00006620#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006621 kmp_diag = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006622#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006623 __kmp_abort_delay = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006624
Jonathan Peyton30419822017-05-12 18:01:32 +00006625 // From __kmp_init_dflt_team_nth()
6626 /* assume the entire machine will be used */
6627 __kmp_dflt_team_nth_ub = __kmp_xproc;
6628 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6629 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6630 }
6631 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6632 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6633 }
6634 __kmp_max_nth = __kmp_sys_max_nth;
Jonathan Peytonf4392462017-07-27 20:58:41 +00006635 __kmp_cg_max_nth = __kmp_sys_max_nth;
Jonathan Peyton4f90c822017-08-02 20:04:45 +00006636 __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default
6637 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6638 __kmp_teams_max_nth = __kmp_sys_max_nth;
6639 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006640
Jonathan Peyton30419822017-05-12 18:01:32 +00006641 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
6642 // part
6643 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006644#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006645 __kmp_monitor_wakeups =
6646 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6647 __kmp_bt_intervals =
6648 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006649#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006650 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6651 __kmp_library = library_throughput;
6652 // From KMP_SCHEDULE initialization
6653 __kmp_static = kmp_sch_static_balanced;
6654// AC: do not use analytical here, because it is non-monotonous
6655//__kmp_guided = kmp_sch_guided_iterative_chunked;
6656//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
6657// need to repeat assignment
6658// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
6659// bit control and barrier method control parts
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006660#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton30419822017-05-12 18:01:32 +00006661#define kmp_reduction_barrier_gather_bb ((int)1)
6662#define kmp_reduction_barrier_release_bb ((int)1)
6663#define kmp_reduction_barrier_gather_pat bp_hyper_bar
6664#define kmp_reduction_barrier_release_pat bp_hyper_bar
6665#endif // KMP_FAST_REDUCTION_BARRIER
6666 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6667 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6668 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6669 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6670 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6671#if KMP_FAST_REDUCTION_BARRIER
6672 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
6673 // lin_64 ): hyper,1
6674 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6675 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6676 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6677 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006678 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006679#endif // KMP_FAST_REDUCTION_BARRIER
6680 }
6681#if KMP_FAST_REDUCTION_BARRIER
6682#undef kmp_reduction_barrier_release_pat
6683#undef kmp_reduction_barrier_gather_pat
6684#undef kmp_reduction_barrier_release_bb
6685#undef kmp_reduction_barrier_gather_bb
6686#endif // KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006687#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006688 if (__kmp_mic_type == mic2) { // KNC
6689 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
6690 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather
6691 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6692 1; // forkjoin release
6693 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6694 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6695 }
6696#if KMP_FAST_REDUCTION_BARRIER
6697 if (__kmp_mic_type == mic2) { // KNC
6698 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6699 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6700 }
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006701#endif // KMP_FAST_REDUCTION_BARRIER
6702#endif // KMP_MIC_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006703
Jonathan Peyton30419822017-05-12 18:01:32 +00006704// From KMP_CHECKS initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00006705#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006706 __kmp_env_checks = TRUE; /* development versions have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006707#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006708 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006709#endif
6710
Jonathan Peyton30419822017-05-12 18:01:32 +00006711 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6712 __kmp_foreign_tp = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006713
Jonathan Peyton30419822017-05-12 18:01:32 +00006714 __kmp_global.g.g_dynamic = FALSE;
6715 __kmp_global.g.g_dynamic_mode = dynamic_default;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006716
Jonathan Peyton30419822017-05-12 18:01:32 +00006717 __kmp_env_initialize(NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006718
Jonathan Peyton30419822017-05-12 18:01:32 +00006719// Print all messages in message catalog for testing purposes.
6720#ifdef KMP_DEBUG
6721 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
6722 if (__kmp_str_match_true(val)) {
6723 kmp_str_buf_t buffer;
6724 __kmp_str_buf_init(&buffer);
6725 __kmp_i18n_dump_catalog(&buffer);
6726 __kmp_printf("%s", buffer.str);
6727 __kmp_str_buf_free(&buffer);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006728 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006729 __kmp_env_free(&val);
6730#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006731
Jonathan Peyton30419822017-05-12 18:01:32 +00006732 __kmp_threads_capacity =
6733 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6734 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6735 __kmp_tp_capacity = __kmp_default_tp_capacity(
6736 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006737
Jonathan Peyton30419822017-05-12 18:01:32 +00006738 // If the library is shut down properly, both pools must be NULL. Just in
6739 // case, set them to NULL -- some memory may leak, but subsequent code will
6740 // work even if pools are not freed.
6741 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6742 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6743 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6744 __kmp_thread_pool = NULL;
6745 __kmp_thread_pool_insert_pt = NULL;
6746 __kmp_team_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006747
Jonathan Peyton30419822017-05-12 18:01:32 +00006748 /* Allocate all of the variable sized records */
6749 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
6750 * expandable */
6751 /* Since allocation is cache-aligned, just add extra padding at the end */
6752 size =
6753 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6754 CACHE_LINE;
6755 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6756 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
6757 sizeof(kmp_info_t *) * __kmp_threads_capacity);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006758
Jonathan Peyton30419822017-05-12 18:01:32 +00006759 /* init thread counts */
6760 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6761 0); // Asserts fail if the library is reinitializing and
6762 KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination.
6763 __kmp_all_nth = 0;
6764 __kmp_nth = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006765
Jonathan Peyton30419822017-05-12 18:01:32 +00006766 /* setup the uber master thread and hierarchy */
6767 gtid = __kmp_register_root(TRUE);
6768 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid));
6769 KMP_ASSERT(KMP_UBER_GTID(gtid));
6770 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006771
Jonathan Peyton30419822017-05-12 18:01:32 +00006772 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006773
Jonathan Peyton30419822017-05-12 18:01:32 +00006774 __kmp_common_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006775
Jonathan Peyton30419822017-05-12 18:01:32 +00006776#if KMP_OS_UNIX
6777 /* invoke the child fork handler */
6778 __kmp_register_atfork();
6779#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006780
Jonathan Peyton8b3842f2018-10-05 17:59:39 +00006781#if !KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +00006782 {
6783 /* Invoke the exit handler when the program finishes, only for static
6784 library. For dynamic library, we already have _fini and DllMain. */
6785 int rc = atexit(__kmp_internal_end_atexit);
6786 if (rc != 0) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00006787 __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc),
6788 __kmp_msg_null);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006789 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006790 }
6791#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006792
Jonathan Peyton30419822017-05-12 18:01:32 +00006793#if KMP_HANDLE_SIGNALS
6794#if KMP_OS_UNIX
6795 /* NOTE: make sure that this is called before the user installs their own
6796 signal handlers so that the user handlers are called first. this way they
6797 can return false, not call our handler, avoid terminating the library, and
6798 continue execution where they left off. */
6799 __kmp_install_signals(FALSE);
6800#endif /* KMP_OS_UNIX */
6801#if KMP_OS_WINDOWS
6802 __kmp_install_signals(TRUE);
6803#endif /* KMP_OS_WINDOWS */
6804#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006805
Jonathan Peyton30419822017-05-12 18:01:32 +00006806 /* we have finished the serial initialization */
6807 __kmp_init_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006808
Jonathan Peyton30419822017-05-12 18:01:32 +00006809 __kmp_init_serial = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006810
Jonathan Peyton30419822017-05-12 18:01:32 +00006811 if (__kmp_settings) {
6812 __kmp_env_print();
6813 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006814
6815#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006816 if (__kmp_display_env || __kmp_display_env_verbose) {
6817 __kmp_env_print_2();
6818 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006819#endif // OMP_40_ENABLED
6820
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006821#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006822 ompt_post_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006823#endif
6824
Jonathan Peyton30419822017-05-12 18:01:32 +00006825 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006826
Jonathan Peyton30419822017-05-12 18:01:32 +00006827 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006828}
6829
Jonathan Peyton30419822017-05-12 18:01:32 +00006830void __kmp_serial_initialize(void) {
6831 if (__kmp_init_serial) {
6832 return;
6833 }
6834 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6835 if (__kmp_init_serial) {
6836 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6837 return;
6838 }
6839 __kmp_do_serial_initialize();
6840 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6841}
6842
6843static void __kmp_do_middle_initialize(void) {
6844 int i, j;
6845 int prev_dflt_team_nth;
6846
6847 if (!__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006848 __kmp_do_serial_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006849 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006850
Jonathan Peyton30419822017-05-12 18:01:32 +00006851 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006852
Jonathan Peyton30419822017-05-12 18:01:32 +00006853 // Save the previous value for the __kmp_dflt_team_nth so that
6854 // we can avoid some reinitialization if it hasn't changed.
6855 prev_dflt_team_nth = __kmp_dflt_team_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006856
Alp Toker98758b02014-03-02 04:12:06 +00006857#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006858 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6859 // number of cores on the machine.
6860 __kmp_affinity_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006861
Jonathan Peyton30419822017-05-12 18:01:32 +00006862 // Run through the __kmp_threads array and set the affinity mask
6863 // for each root thread that is currently registered with the RTL.
6864 for (i = 0; i < __kmp_threads_capacity; i++) {
6865 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6866 __kmp_affinity_set_init_mask(i, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006867 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006868 }
Alp Toker98758b02014-03-02 04:12:06 +00006869#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006870
Jonathan Peyton30419822017-05-12 18:01:32 +00006871 KMP_ASSERT(__kmp_xproc > 0);
6872 if (__kmp_avail_proc == 0) {
6873 __kmp_avail_proc = __kmp_xproc;
6874 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006875
Jonathan Peyton30419822017-05-12 18:01:32 +00006876 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
6877 // correct them now
6878 j = 0;
6879 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6880 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6881 __kmp_avail_proc;
6882 j++;
6883 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006884
Jonathan Peyton30419822017-05-12 18:01:32 +00006885 if (__kmp_dflt_team_nth == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006886#ifdef KMP_DFLT_NTH_CORES
Jonathan Peyton30419822017-05-12 18:01:32 +00006887 // Default #threads = #cores
6888 __kmp_dflt_team_nth = __kmp_ncores;
6889 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6890 "__kmp_ncores (%d)\n",
6891 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006892#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006893 // Default #threads = #available OS procs
6894 __kmp_dflt_team_nth = __kmp_avail_proc;
6895 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6896 "__kmp_avail_proc(%d)\n",
6897 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006898#endif /* KMP_DFLT_NTH_CORES */
Jonathan Peyton30419822017-05-12 18:01:32 +00006899 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006900
Jonathan Peyton30419822017-05-12 18:01:32 +00006901 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6902 __kmp_dflt_team_nth = KMP_MIN_NTH;
6903 }
6904 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6905 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6906 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006907
Jonathan Peyton30419822017-05-12 18:01:32 +00006908 // There's no harm in continuing if the following check fails,
6909 // but it indicates an error in the previous logic.
6910 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006911
Jonathan Peyton30419822017-05-12 18:01:32 +00006912 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6913 // Run through the __kmp_threads array and set the num threads icv for each
6914 // root thread that is currently registered with the RTL (which has not
6915 // already explicitly set its nthreads-var with a call to
6916 // omp_set_num_threads()).
6917 for (i = 0; i < __kmp_threads_capacity; i++) {
6918 kmp_info_t *thread = __kmp_threads[i];
6919 if (thread == NULL)
6920 continue;
6921 if (thread->th.th_current_task->td_icvs.nproc != 0)
6922 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006923
Jonathan Peyton30419822017-05-12 18:01:32 +00006924 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006925 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006926 }
6927 KA_TRACE(
6928 20,
6929 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6930 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006931
6932#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00006933 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
6934 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6935 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6936 if (__kmp_nth > __kmp_avail_proc) {
6937 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006938 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006939 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006940#endif /* KMP_ADJUST_BLOCKTIME */
6941
Jonathan Peyton30419822017-05-12 18:01:32 +00006942 /* we have finished middle initialization */
6943 TCW_SYNC_4(__kmp_init_middle, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006944
Jonathan Peyton30419822017-05-12 18:01:32 +00006945 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006946}
6947
Jonathan Peyton30419822017-05-12 18:01:32 +00006948void __kmp_middle_initialize(void) {
6949 if (__kmp_init_middle) {
6950 return;
6951 }
6952 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6953 if (__kmp_init_middle) {
6954 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6955 return;
6956 }
6957 __kmp_do_middle_initialize();
6958 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6959}
6960
6961void __kmp_parallel_initialize(void) {
6962 int gtid = __kmp_entry_gtid(); // this might be a new root
6963
6964 /* synchronize parallel initialization (for sibling) */
6965 if (TCR_4(__kmp_init_parallel))
6966 return;
6967 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6968 if (TCR_4(__kmp_init_parallel)) {
6969 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6970 return;
6971 }
6972
6973 /* TODO reinitialization after we have already shut down */
6974 if (TCR_4(__kmp_global.g.g_done)) {
6975 KA_TRACE(
6976 10,
6977 ("__kmp_parallel_initialize: attempt to init while shutting down\n"));
6978 __kmp_infinite_loop();
6979 }
6980
6981 /* jc: The lock __kmp_initz_lock is already held, so calling
6982 __kmp_serial_initialize would cause a deadlock. So we call
6983 __kmp_do_serial_initialize directly. */
6984 if (!__kmp_init_middle) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006985 __kmp_do_middle_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006986 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006987
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00006988#if OMP_50_ENABLED
6989 __kmp_resume_if_hard_paused();
6990#endif
6991
Jonathan Peyton30419822017-05-12 18:01:32 +00006992 /* begin initialization */
6993 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"));
6994 KMP_ASSERT(KMP_UBER_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006995
6996#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00006997 // Save the FP control regs.
6998 // Worker threads will set theirs to these values at thread startup.
6999 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7000 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7001 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007002#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
7003
7004#if KMP_OS_UNIX
Jonathan Peyton30419822017-05-12 18:01:32 +00007005#if KMP_HANDLE_SIGNALS
7006 /* must be after __kmp_serial_initialize */
7007 __kmp_install_signals(TRUE);
7008#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007009#endif
7010
Jonathan Peyton30419822017-05-12 18:01:32 +00007011 __kmp_suspend_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007012
Jonathan Peyton749b4d52016-01-27 21:02:04 +00007013#if defined(USE_LOAD_BALANCE)
Jonathan Peyton30419822017-05-12 18:01:32 +00007014 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7015 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7016 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007017#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007018 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7019 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7020 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007021#endif
7022
Jonathan Peyton30419822017-05-12 18:01:32 +00007023 if (__kmp_version) {
7024 __kmp_print_version_2();
7025 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007026
Jonathan Peyton30419822017-05-12 18:01:32 +00007027 /* we have finished parallel initialization */
7028 TCW_SYNC_4(__kmp_init_parallel, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007029
Jonathan Peyton30419822017-05-12 18:01:32 +00007030 KMP_MB();
7031 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007032
Jonathan Peyton30419822017-05-12 18:01:32 +00007033 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007034}
7035
Jim Cownie5e8470a2013-09-27 10:38:44 +00007036/* ------------------------------------------------------------------------ */
7037
Jonathan Peyton30419822017-05-12 18:01:32 +00007038void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7039 kmp_team_t *team) {
7040 kmp_disp_t *dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007041
Jonathan Peyton30419822017-05-12 18:01:32 +00007042 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007043
Jonathan Peyton30419822017-05-12 18:01:32 +00007044 /* none of the threads have encountered any constructs, yet. */
7045 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007046#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +00007047 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007048#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +00007049 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7050 KMP_DEBUG_ASSERT(dispatch);
7051 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7052 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
7053 // this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00007054
Jonathan Peyton30419822017-05-12 18:01:32 +00007055 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007056#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007057 dispatch->th_doacross_buf_idx =
7058 0; /* reset the doacross dispatch buffer counter */
Jonathan Peyton71909c52016-03-02 22:42:06 +00007059#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007060 if (__kmp_env_consistency_check)
7061 __kmp_push_parallel(gtid, team->t.t_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007062
Jonathan Peyton30419822017-05-12 18:01:32 +00007063 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007064}
7065
Jonathan Peyton30419822017-05-12 18:01:32 +00007066void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
7067 kmp_team_t *team) {
7068 if (__kmp_env_consistency_check)
7069 __kmp_pop_parallel(gtid, team->t.t_ident);
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +00007070
Jonathan Peyton30419822017-05-12 18:01:32 +00007071 __kmp_finish_implicit_task(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007072}
7073
Jonathan Peyton30419822017-05-12 18:01:32 +00007074int __kmp_invoke_task_func(int gtid) {
7075 int rc;
7076 int tid = __kmp_tid_from_gtid(gtid);
7077 kmp_info_t *this_thr = __kmp_threads[gtid];
7078 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007079
Jonathan Peyton30419822017-05-12 18:01:32 +00007080 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007081#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00007082 if (__itt_stack_caller_create_ptr) {
7083 __kmp_itt_stack_callee_enter(
7084 (__itt_caller)
7085 team->t.t_stack_id); // inform ittnotify about entering user's code
7086 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007087#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007088#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00007089 SSC_MARK_INVOKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007090#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007091
7092#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00007093 void *dummy;
7094 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00007095 ompt_data_t *my_task_data;
7096 ompt_data_t *my_parallel_data;
7097 int ompt_team_size;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007098
Joachim Protze82e94a52017-11-01 10:08:30 +00007099 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00007100 exit_runtime_p = &(
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00007101 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00007102 } else {
7103 exit_runtime_p = &dummy;
7104 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007105
Joachim Protze82e94a52017-11-01 10:08:30 +00007106 my_task_data =
7107 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7108 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7109 if (ompt_enabled.ompt_callback_implicit_task) {
7110 ompt_team_size = team->t.t_nproc;
7111 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7112 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
Joachim Protze2b46d302019-01-15 15:36:53 +00007113 __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Joachim Protze9be9cf22018-05-07 12:42:21 +00007114 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00007115 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007116#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007117
Jonathan Peyton30419822017-05-12 18:01:32 +00007118 {
7119 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
7120 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
7121 rc =
7122 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7123 tid, (int)team->t.t_argc, (void **)team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007124#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00007125 ,
7126 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007127#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007128 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00007129#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00007130 *exit_runtime_p = NULL;
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00007131#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007132 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00007133
Jim Cownie5e8470a2013-09-27 10:38:44 +00007134#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00007135 if (__itt_stack_caller_create_ptr) {
7136 __kmp_itt_stack_callee_leave(
7137 (__itt_caller)
7138 team->t.t_stack_id); // inform ittnotify about leaving user's code
7139 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007140#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00007141 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007142
Jonathan Peyton30419822017-05-12 18:01:32 +00007143 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007144}
7145
7146#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007147void __kmp_teams_master(int gtid) {
7148 // This routine is called by all master threads in teams construct
7149 kmp_info_t *thr = __kmp_threads[gtid];
7150 kmp_team_t *team = thr->th.th_team;
7151 ident_t *loc = team->t.t_ident;
7152 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7153 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7154 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7155 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7156 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00007157
7158 // This thread is a new CG root. Set up the proper variables.
7159 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t));
7160 tmp->cg_root = thr; // Make thr the CG root
7161 // Init to thread limit that was stored when league masters were forked
7162 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7163 tmp->cg_nthreads = 1; // Init counter to one active thread, this one
7164 KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init"
7165 " cg_threads to 1\n",
7166 thr, tmp));
7167 tmp->up = thr->th.th_cg_roots;
7168 thr->th.th_cg_roots = tmp;
7169
Jonathan Peyton30419822017-05-12 18:01:32 +00007170// Launch league of teams now, but not let workers execute
7171// (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007172#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00007173 SSC_MARK_FORKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007174#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007175 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
Jonathan Peyton30419822017-05-12 18:01:32 +00007176 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
7177 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007178#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00007179 SSC_MARK_JOINING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007180#endif
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00007181 // If the team size was reduced from the limit, set it to the new size
7182 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7183 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
Jonathan Peyton30419822017-05-12 18:01:32 +00007184 // AC: last parameter "1" eliminates join barrier which won't work because
7185 // worker threads are in a fork barrier waiting for more parallel regions
7186 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00007187#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00007188 ,
7189 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00007190#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007191 ,
7192 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007193}
7194
Jonathan Peyton30419822017-05-12 18:01:32 +00007195int __kmp_invoke_teams_master(int gtid) {
7196 kmp_info_t *this_thr = __kmp_threads[gtid];
7197 kmp_team_t *team = this_thr->th.th_team;
7198#if KMP_DEBUG
7199 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7200 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7201 (void *)__kmp_teams_master);
7202#endif
7203 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7204 __kmp_teams_master(gtid);
7205 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7206 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007207}
7208#endif /* OMP_40_ENABLED */
7209
7210/* this sets the requested number of threads for the next parallel region
Jonathan Peyton30419822017-05-12 18:01:32 +00007211 encountered by this team. since this should be enclosed in the forkjoin
7212 critical section it should avoid race conditions with assymmetrical nested
7213 parallelism */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007214
Jonathan Peyton30419822017-05-12 18:01:32 +00007215void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
7216 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007217
Jonathan Peyton30419822017-05-12 18:01:32 +00007218 if (num_threads > 0)
7219 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007220}
7221
7222#if OMP_40_ENABLED
7223
7224/* this sets the requested number of teams for the teams region and/or
Jonathan Peyton30419822017-05-12 18:01:32 +00007225 the number of threads for the next parallel region encountered */
7226void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
7227 int num_threads) {
7228 kmp_info_t *thr = __kmp_threads[gtid];
7229 KMP_DEBUG_ASSERT(num_teams >= 0);
7230 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007231
Jonathan Peyton30419822017-05-12 18:01:32 +00007232 if (num_teams == 0)
7233 num_teams = 1; // default number of teams is 1.
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007234 if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
Jonathan Peyton30419822017-05-12 18:01:32 +00007235 if (!__kmp_reserve_warn) {
7236 __kmp_reserve_warn = 1;
7237 __kmp_msg(kmp_ms_warning,
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007238 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
Jonathan Peyton30419822017-05-12 18:01:32 +00007239 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007240 }
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007241 num_teams = __kmp_teams_max_nth;
Jonathan Peyton30419822017-05-12 18:01:32 +00007242 }
7243 // Set number of teams (number of threads in the outer "parallel" of the
7244 // teams)
7245 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007246
Jonathan Peyton30419822017-05-12 18:01:32 +00007247 // Remember the number of threads for inner parallel regions
7248 if (num_threads == 0) {
7249 if (!TCR_4(__kmp_init_middle))
7250 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
7251 num_threads = __kmp_avail_proc / num_teams;
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007252 if (num_teams * num_threads > __kmp_teams_max_nth) {
Jonathan Peyton30419822017-05-12 18:01:32 +00007253 // adjust num_threads w/o warning as it is not user setting
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007254 num_threads = __kmp_teams_max_nth / num_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007255 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007256 } else {
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00007257 // This thread will be the master of the league masters
7258 // Store new thread limit; old limit is saved in th_cg_roots list
7259 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7260
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007261 if (num_teams * num_threads > __kmp_teams_max_nth) {
7262 int new_threads = __kmp_teams_max_nth / num_teams;
Jonathan Peyton30419822017-05-12 18:01:32 +00007263 if (!__kmp_reserve_warn) { // user asked for too many threads
Jonathan Peyton65ebfee2019-02-11 21:04:23 +00007264 __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
Jonathan Peyton30419822017-05-12 18:01:32 +00007265 __kmp_msg(kmp_ms_warning,
7266 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7267 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7268 }
7269 num_threads = new_threads;
7270 }
7271 }
7272 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007273}
7274
Jim Cownie5e8470a2013-09-27 10:38:44 +00007275// Set the proc_bind var to use in the following parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007276void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
7277 kmp_info_t *thr = __kmp_threads[gtid];
7278 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007279}
7280
7281#endif /* OMP_40_ENABLED */
7282
7283/* Launch the worker threads into the microtask. */
7284
Jonathan Peyton30419822017-05-12 18:01:32 +00007285void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
7286 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007287
7288#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007289 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007290#endif /* KMP_DEBUG */
7291
Jonathan Peyton30419822017-05-12 18:01:32 +00007292 KMP_DEBUG_ASSERT(team);
7293 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7294 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7295 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007296
Jonathan Peyton30419822017-05-12 18:01:32 +00007297 team->t.t_construct = 0; /* no single directives seen yet */
7298 team->t.t_ordered.dt.t_value =
7299 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007300
Jonathan Peyton30419822017-05-12 18:01:32 +00007301 /* Reset the identifiers on the dispatch buffer */
7302 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7303 if (team->t.t_max_nproc > 1) {
7304 int i;
7305 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7306 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007307#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007308 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00007309#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007310 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007311 } else {
7312 team->t.t_disp_buffer[0].buffer_index = 0;
7313#if OMP_45_ENABLED
7314 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7315#endif
7316 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007317
Jonathan Peyton30419822017-05-12 18:01:32 +00007318 KMP_MB(); /* Flush all pending memory write invalidates. */
7319 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007320
7321#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007322 for (f = 0; f < team->t.t_nproc; f++) {
7323 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7324 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7325 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007326#endif /* KMP_DEBUG */
7327
Jonathan Peyton30419822017-05-12 18:01:32 +00007328 /* release the worker threads so they may begin working */
7329 __kmp_fork_barrier(gtid, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007330}
7331
Jonathan Peyton30419822017-05-12 18:01:32 +00007332void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
7333 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007334
Jonathan Peyton30419822017-05-12 18:01:32 +00007335 KMP_DEBUG_ASSERT(team);
7336 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7337 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7338 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007339
Jonathan Peyton30419822017-05-12 18:01:32 +00007340/* Join barrier after fork */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007341
7342#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007343 if (__kmp_threads[gtid] &&
7344 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7345 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7346 __kmp_threads[gtid]);
7347 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7348 "team->t.t_nproc=%d\n",
7349 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7350 team->t.t_nproc);
7351 __kmp_print_structure();
7352 }
7353 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7354 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007355#endif /* KMP_DEBUG */
7356
Jonathan Peyton30419822017-05-12 18:01:32 +00007357 __kmp_join_barrier(gtid); /* wait for everyone */
Joachim Protze82e94a52017-11-01 10:08:30 +00007358#if OMPT_SUPPORT
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007359 if (ompt_enabled.enabled &&
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00007360 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007361 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7362 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
Joachim Protze0e0d6cd2018-12-18 08:52:30 +00007363 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Joachim Protze82e94a52017-11-01 10:08:30 +00007364#if OMPT_OPTIONAL
7365 void *codeptr = NULL;
7366 if (KMP_MASTER_TID(ds_tid) &&
7367 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7368 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7369 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7370
7371 if (ompt_enabled.ompt_callback_sync_region_wait) {
7372 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
Jonathan Peytonad1ad7a2019-02-28 20:55:39 +00007373 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7374 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00007375 }
7376 if (ompt_enabled.ompt_callback_sync_region) {
7377 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
Jonathan Peytonad1ad7a2019-02-28 20:55:39 +00007378 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7379 codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00007380 }
7381#endif
7382 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7383 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
Joachim Protze2b46d302019-01-15 15:36:53 +00007384 ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit); // TODO: Can this be ompt_task_initial?
Joachim Protze82e94a52017-11-01 10:08:30 +00007385 }
Joachim Protze82e94a52017-11-01 10:08:30 +00007386 }
7387#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007388
Jonathan Peyton30419822017-05-12 18:01:32 +00007389 KMP_MB(); /* Flush all pending memory write invalidates. */
7390 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007391}
7392
Jim Cownie5e8470a2013-09-27 10:38:44 +00007393/* ------------------------------------------------------------------------ */
7394
7395#ifdef USE_LOAD_BALANCE
7396
Jim Cownie5e8470a2013-09-27 10:38:44 +00007397// Return the worker threads actively spinning in the hot team, if we
7398// are at the outermost level of parallelism. Otherwise, return 0.
Jonathan Peyton30419822017-05-12 18:01:32 +00007399static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7400 int i;
7401 int retval;
7402 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007403
Jonathan Peyton30419822017-05-12 18:01:32 +00007404 if (root->r.r_active) {
7405 return 0;
7406 }
7407 hot_team = root->r.r_hot_team;
7408 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7409 return hot_team->t.t_nproc - 1; // Don't count master thread
7410 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007411
Jonathan Peyton30419822017-05-12 18:01:32 +00007412 // Skip the master thread - it is accounted for elsewhere.
7413 retval = 0;
7414 for (i = 1; i < hot_team->t.t_nproc; i++) {
7415 if (hot_team->t.t_threads[i]->th.th_active) {
7416 retval++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007417 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007418 }
7419 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007420}
7421
Jim Cownie5e8470a2013-09-27 10:38:44 +00007422// Perform an automatic adjustment to the number of
7423// threads used by the next parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007424static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
7425 int retval;
7426 int pool_active;
7427 int hot_team_active;
7428 int team_curr_active;
7429 int system_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007430
Jonathan Peyton30419822017-05-12 18:01:32 +00007431 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7432 set_nproc));
7433 KMP_DEBUG_ASSERT(root);
7434 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7435 ->th.th_current_task->td_icvs.dynamic == TRUE);
7436 KMP_DEBUG_ASSERT(set_nproc > 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007437
Jonathan Peyton30419822017-05-12 18:01:32 +00007438 if (set_nproc == 1) {
7439 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"));
7440 return 1;
7441 }
7442
7443 // Threads that are active in the thread pool, active in the hot team for this
7444 // particular root (if we are at the outer par level), and the currently
7445 // executing thread (to become the master) are available to add to the new
7446 // team, but are currently contributing to the system load, and must be
7447 // accounted for.
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00007448 pool_active = __kmp_thread_pool_active_nth;
Jonathan Peyton30419822017-05-12 18:01:32 +00007449 hot_team_active = __kmp_active_hot_team_nproc(root);
7450 team_curr_active = pool_active + hot_team_active + 1;
7451
7452 // Check the system load.
7453 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7454 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "
7455 "hot team active = %d\n",
7456 system_active, pool_active, hot_team_active));
7457
7458 if (system_active < 0) {
7459 // There was an error reading the necessary info from /proc, so use the
7460 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
7461 // = dynamic_thread_limit, we shouldn't wind up getting back here.
7462 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7463 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit");
7464
7465 // Make this call behave like the thread limit algorithm.
7466 retval = __kmp_avail_proc - __kmp_nth +
7467 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7468 if (retval > set_nproc) {
7469 retval = set_nproc;
7470 }
7471 if (retval < KMP_MIN_NTH) {
7472 retval = KMP_MIN_NTH;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007473 }
7474
Jonathan Peyton30419822017-05-12 18:01:32 +00007475 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7476 retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007477 return retval;
Jonathan Peyton30419822017-05-12 18:01:32 +00007478 }
7479
7480 // There is a slight delay in the load balance algorithm in detecting new
7481 // running procs. The real system load at this instant should be at least as
7482 // large as the #active omp thread that are available to add to the team.
7483 if (system_active < team_curr_active) {
7484 system_active = team_curr_active;
7485 }
7486 retval = __kmp_avail_proc - system_active + team_curr_active;
7487 if (retval > set_nproc) {
7488 retval = set_nproc;
7489 }
7490 if (retval < KMP_MIN_NTH) {
7491 retval = KMP_MIN_NTH;
7492 }
7493
7494 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7495 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007496} // __kmp_load_balance_nproc()
7497
7498#endif /* USE_LOAD_BALANCE */
7499
Jim Cownie5e8470a2013-09-27 10:38:44 +00007500/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007501
7502/* NOTE: this is called with the __kmp_init_lock held */
Jonathan Peyton30419822017-05-12 18:01:32 +00007503void __kmp_cleanup(void) {
7504 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007505
Jonathan Peyton30419822017-05-12 18:01:32 +00007506 KA_TRACE(10, ("__kmp_cleanup: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007507
Jonathan Peyton30419822017-05-12 18:01:32 +00007508 if (TCR_4(__kmp_init_parallel)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007509#if KMP_HANDLE_SIGNALS
Jonathan Peyton30419822017-05-12 18:01:32 +00007510 __kmp_remove_signals();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007511#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007512 TCW_4(__kmp_init_parallel, FALSE);
7513 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007514
Jonathan Peyton30419822017-05-12 18:01:32 +00007515 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007516#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007517 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007518#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton30419822017-05-12 18:01:32 +00007519 __kmp_cleanup_hierarchy();
7520 TCW_4(__kmp_init_middle, FALSE);
7521 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007522
Jonathan Peyton30419822017-05-12 18:01:32 +00007523 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007524
Jonathan Peyton30419822017-05-12 18:01:32 +00007525 if (__kmp_init_serial) {
7526 __kmp_runtime_destroy();
7527 __kmp_init_serial = FALSE;
7528 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007529
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00007530 __kmp_cleanup_threadprivate_caches();
7531
Jonathan Peyton30419822017-05-12 18:01:32 +00007532 for (f = 0; f < __kmp_threads_capacity; f++) {
7533 if (__kmp_root[f] != NULL) {
7534 __kmp_free(__kmp_root[f]);
7535 __kmp_root[f] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007536 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007537 }
7538 __kmp_free(__kmp_threads);
7539 // __kmp_threads and __kmp_root were allocated at once, as single block, so
7540 // there is no need in freeing __kmp_root.
7541 __kmp_threads = NULL;
7542 __kmp_root = NULL;
7543 __kmp_threads_capacity = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007544
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007545#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00007546 __kmp_cleanup_indirect_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007547#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007548 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007549#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007550
Jonathan Peyton30419822017-05-12 18:01:32 +00007551#if KMP_AFFINITY_SUPPORTED
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00007552 KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file));
Jonathan Peyton30419822017-05-12 18:01:32 +00007553 __kmp_cpuinfo_file = NULL;
7554#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007555
Jonathan Peyton30419822017-05-12 18:01:32 +00007556#if KMP_USE_ADAPTIVE_LOCKS
7557#if KMP_DEBUG_ADAPTIVE_LOCKS
7558 __kmp_print_speculative_stats();
7559#endif
7560#endif
7561 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7562 __kmp_nested_nth.nth = NULL;
7563 __kmp_nested_nth.size = 0;
7564 __kmp_nested_nth.used = 0;
7565 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7566 __kmp_nested_proc_bind.bind_types = NULL;
7567 __kmp_nested_proc_bind.size = 0;
7568 __kmp_nested_proc_bind.used = 0;
Jonathan Peyton6d88e042018-12-13 23:14:24 +00007569#if OMP_50_ENABLED
7570 if (__kmp_affinity_format) {
7571 KMP_INTERNAL_FREE(__kmp_affinity_format);
7572 __kmp_affinity_format = NULL;
7573 }
7574#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007575
Jonathan Peyton30419822017-05-12 18:01:32 +00007576 __kmp_i18n_catclose();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007577
Jonathan Peytonf6399362018-07-09 17:51:13 +00007578#if KMP_USE_HIER_SCHED
7579 __kmp_hier_scheds.deallocate();
7580#endif
7581
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007582#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007583 __kmp_stats_fini();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007584#endif
7585
Jonathan Peyton30419822017-05-12 18:01:32 +00007586 KA_TRACE(10, ("__kmp_cleanup: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007587}
7588
7589/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007590
7591int __kmp_ignore_mppbeg(void) {
7592 char *env;
7593
7594 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) {
7595 if (__kmp_str_match_false(env))
7596 return FALSE;
7597 }
7598 // By default __kmpc_begin() is no-op.
7599 return TRUE;
7600}
7601
7602int __kmp_ignore_mppend(void) {
7603 char *env;
7604
7605 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) {
7606 if (__kmp_str_match_false(env))
7607 return FALSE;
7608 }
7609 // By default __kmpc_end() is no-op.
7610 return TRUE;
7611}
7612
7613void __kmp_internal_begin(void) {
7614 int gtid;
7615 kmp_root_t *root;
7616
7617 /* this is a very important step as it will register new sibling threads
7618 and assign these new uber threads a new gtid */
7619 gtid = __kmp_entry_gtid();
7620 root = __kmp_threads[gtid]->th.th_root;
7621 KMP_ASSERT(KMP_UBER_GTID(gtid));
7622
7623 if (root->r.r_begin)
7624 return;
7625 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7626 if (root->r.r_begin) {
7627 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7628 return;
7629 }
7630
7631 root->r.r_begin = TRUE;
7632
7633 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7634}
7635
Jim Cownie5e8470a2013-09-27 10:38:44 +00007636/* ------------------------------------------------------------------------ */
7637
Jonathan Peyton30419822017-05-12 18:01:32 +00007638void __kmp_user_set_library(enum library_type arg) {
7639 int gtid;
7640 kmp_root_t *root;
7641 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007642
Jonathan Peyton30419822017-05-12 18:01:32 +00007643 /* first, make sure we are initialized so we can get our gtid */
7644
7645 gtid = __kmp_entry_gtid();
7646 thread = __kmp_threads[gtid];
7647
7648 root = thread->th.th_root;
7649
7650 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7651 library_serial));
7652 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
7653 thread */
7654 KMP_WARNING(SetLibraryIncorrectCall);
7655 return;
7656 }
7657
7658 switch (arg) {
7659 case library_serial:
7660 thread->th.th_set_nproc = 0;
7661 set__nproc(thread, 1);
7662 break;
7663 case library_turnaround:
7664 thread->th.th_set_nproc = 0;
7665 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7666 : __kmp_dflt_team_nth_ub);
7667 break;
7668 case library_throughput:
7669 thread->th.th_set_nproc = 0;
7670 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7671 : __kmp_dflt_team_nth_ub);
7672 break;
7673 default:
7674 KMP_FATAL(UnknownLibraryType, arg);
7675 }
7676
7677 __kmp_aux_set_library(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007678}
7679
Jonathan Peyton30419822017-05-12 18:01:32 +00007680void __kmp_aux_set_stacksize(size_t arg) {
7681 if (!__kmp_init_serial)
7682 __kmp_serial_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007683
7684#if KMP_OS_DARWIN
Jonathan Peyton30419822017-05-12 18:01:32 +00007685 if (arg & (0x1000 - 1)) {
7686 arg &= ~(0x1000 - 1);
7687 if (arg + 0x1000) /* check for overflow if we round up */
7688 arg += 0x1000;
7689 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007690#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007691 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007692
Jonathan Peyton30419822017-05-12 18:01:32 +00007693 /* only change the default stacksize before the first parallel region */
7694 if (!TCR_4(__kmp_init_parallel)) {
7695 size_t value = arg; /* argument is in bytes */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007696
Jonathan Peyton30419822017-05-12 18:01:32 +00007697 if (value < __kmp_sys_min_stksize)
7698 value = __kmp_sys_min_stksize;
7699 else if (value > KMP_MAX_STKSIZE)
7700 value = KMP_MAX_STKSIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007701
Jonathan Peyton30419822017-05-12 18:01:32 +00007702 __kmp_stksize = value;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007703
Jonathan Peyton30419822017-05-12 18:01:32 +00007704 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7705 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007706
Jonathan Peyton30419822017-05-12 18:01:32 +00007707 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007708}
7709
7710/* set the behaviour of the runtime library */
7711/* TODO this can cause some odd behaviour with sibling parallelism... */
Jonathan Peyton30419822017-05-12 18:01:32 +00007712void __kmp_aux_set_library(enum library_type arg) {
7713 __kmp_library = arg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007714
Jonathan Peyton30419822017-05-12 18:01:32 +00007715 switch (__kmp_library) {
7716 case library_serial: {
7717 KMP_INFORM(LibraryIsSerial);
Jonathan Peyton30419822017-05-12 18:01:32 +00007718 } break;
7719 case library_turnaround:
Jonathan Peytone47d32f2019-02-28 19:11:29 +00007720 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
7721 __kmp_use_yield = 2; // only yield when oversubscribed
Jonathan Peyton30419822017-05-12 18:01:32 +00007722 break;
7723 case library_throughput:
Jonathan Peytone47d32f2019-02-28 19:11:29 +00007724 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
7725 __kmp_dflt_blocktime = 200;
Jonathan Peyton30419822017-05-12 18:01:32 +00007726 break;
7727 default:
7728 KMP_FATAL(UnknownLibraryType, arg);
7729 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007730}
7731
Jonathan Peyton6d88e042018-12-13 23:14:24 +00007732/* Getting team information common for all team API */
7733// Returns NULL if not in teams construct
7734static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) {
7735 kmp_info_t *thr = __kmp_entry_thread();
7736 teams_serialized = 0;
7737 if (thr->th.th_teams_microtask) {
7738 kmp_team_t *team = thr->th.th_team;
7739 int tlevel = thr->th.th_teams_level; // the level of the teams construct
7740 int ii = team->t.t_level;
7741 teams_serialized = team->t.t_serialized;
7742 int level = tlevel + 1;
7743 KMP_DEBUG_ASSERT(ii >= tlevel);
7744 while (ii > level) {
7745 for (teams_serialized = team->t.t_serialized;
7746 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
7747 }
7748 if (team->t.t_serialized && (!teams_serialized)) {
7749 team = team->t.t_parent;
7750 continue;
7751 }
7752 if (ii > level) {
7753 team = team->t.t_parent;
7754 ii--;
7755 }
7756 }
7757 return team;
7758 }
7759 return NULL;
7760}
7761
7762int __kmp_aux_get_team_num() {
7763 int serialized;
7764 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7765 if (team) {
7766 if (serialized > 1) {
7767 return 0; // teams region is serialized ( 1 team of 1 thread ).
7768 } else {
7769 return team->t.t_master_tid;
7770 }
7771 }
7772 return 0;
7773}
7774
7775int __kmp_aux_get_num_teams() {
7776 int serialized;
7777 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7778 if (team) {
7779 if (serialized > 1) {
7780 return 1;
7781 } else {
7782 return team->t.t_parent->t.t_nproc;
7783 }
7784 }
7785 return 1;
7786}
7787
7788/* ------------------------------------------------------------------------ */
7789
7790#if OMP_50_ENABLED
7791/*
7792 * Affinity Format Parser
7793 *
7794 * Field is in form of: %[[[0].]size]type
7795 * % and type are required (%% means print a literal '%')
7796 * type is either single char or long name surrounded by {},
7797 * e.g., N or {num_threads}
7798 * 0 => leading zeros
7799 * . => right justified when size is specified
7800 * by default output is left justified
7801 * size is the *minimum* field length
7802 * All other characters are printed as is
7803 *
7804 * Available field types:
7805 * L {thread_level} - omp_get_level()
7806 * n {thread_num} - omp_get_thread_num()
7807 * h {host} - name of host machine
7808 * P {process_id} - process id (integer)
7809 * T {thread_identifier} - native thread identifier (integer)
7810 * N {num_threads} - omp_get_num_threads()
7811 * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1)
7812 * a {thread_affinity} - comma separated list of integers or integer ranges
7813 * (values of affinity mask)
7814 *
7815 * Implementation-specific field types can be added
7816 * If a type is unknown, print "undefined"
7817*/
7818
7819// Structure holding the short name, long name, and corresponding data type
7820// for snprintf. A table of these will represent the entire valid keyword
7821// field types.
7822typedef struct kmp_affinity_format_field_t {
7823 char short_name; // from spec e.g., L -> thread level
7824 const char *long_name; // from spec thread_level -> thread level
7825 char field_format; // data type for snprintf (typically 'd' or 's'
7826 // for integer or string)
7827} kmp_affinity_format_field_t;
7828
7829static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
7830#if KMP_AFFINITY_SUPPORTED
7831 {'A', "thread_affinity", 's'},
7832#endif
7833 {'t', "team_num", 'd'},
7834 {'T', "num_teams", 'd'},
7835 {'L', "nesting_level", 'd'},
7836 {'n', "thread_num", 'd'},
7837 {'N', "num_threads", 'd'},
7838 {'a', "ancestor_tnum", 'd'},
7839 {'H', "host", 's'},
7840 {'P', "process_id", 'd'},
7841 {'i', "native_thread_id", 'd'}};
7842
7843// Return the number of characters it takes to hold field
7844static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th,
7845 const char **ptr,
7846 kmp_str_buf_t *field_buffer) {
7847 int rc, format_index, field_value;
7848 const char *width_left, *width_right;
7849 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
7850 static const int FORMAT_SIZE = 20;
7851 char format[FORMAT_SIZE] = {0};
7852 char absolute_short_name = 0;
7853
7854 KMP_DEBUG_ASSERT(gtid >= 0);
7855 KMP_DEBUG_ASSERT(th);
7856 KMP_DEBUG_ASSERT(**ptr == '%');
7857 KMP_DEBUG_ASSERT(field_buffer);
7858
7859 __kmp_str_buf_clear(field_buffer);
7860
7861 // Skip the initial %
7862 (*ptr)++;
7863
7864 // Check for %% first
7865 if (**ptr == '%') {
7866 __kmp_str_buf_cat(field_buffer, "%", 1);
7867 (*ptr)++; // skip over the second %
7868 return 1;
7869 }
7870
7871 // Parse field modifiers if they are present
7872 pad_zeros = false;
7873 if (**ptr == '0') {
7874 pad_zeros = true;
7875 (*ptr)++; // skip over 0
7876 }
7877 right_justify = false;
7878 if (**ptr == '.') {
7879 right_justify = true;
7880 (*ptr)++; // skip over .
7881 }
7882 // Parse width of field: [width_left, width_right)
7883 width_left = width_right = NULL;
7884 if (**ptr >= '0' && **ptr <= '9') {
7885 width_left = *ptr;
7886 SKIP_DIGITS(*ptr);
7887 width_right = *ptr;
7888 }
7889
7890 // Create the format for KMP_SNPRINTF based on flags parsed above
7891 format_index = 0;
7892 format[format_index++] = '%';
7893 if (!right_justify)
7894 format[format_index++] = '-';
7895 if (pad_zeros)
7896 format[format_index++] = '0';
7897 if (width_left && width_right) {
7898 int i = 0;
7899 // Only allow 8 digit number widths.
7900 // This also prevents overflowing format variable
7901 while (i < 8 && width_left < width_right) {
7902 format[format_index++] = *width_left;
7903 width_left++;
7904 i++;
7905 }
7906 }
7907
7908 // Parse a name (long or short)
7909 // Canonicalize the name into absolute_short_name
7910 found_valid_name = false;
7911 parse_long_name = (**ptr == '{');
7912 if (parse_long_name)
7913 (*ptr)++; // skip initial left brace
7914 for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) /
7915 sizeof(__kmp_affinity_format_table[0]);
7916 ++i) {
7917 char short_name = __kmp_affinity_format_table[i].short_name;
7918 const char *long_name = __kmp_affinity_format_table[i].long_name;
7919 char field_format = __kmp_affinity_format_table[i].field_format;
7920 if (parse_long_name) {
7921 int length = KMP_STRLEN(long_name);
7922 if (strncmp(*ptr, long_name, length) == 0) {
7923 found_valid_name = true;
7924 (*ptr) += length; // skip the long name
7925 }
7926 } else if (**ptr == short_name) {
7927 found_valid_name = true;
7928 (*ptr)++; // skip the short name
7929 }
7930 if (found_valid_name) {
7931 format[format_index++] = field_format;
7932 format[format_index++] = '\0';
7933 absolute_short_name = short_name;
7934 break;
7935 }
7936 }
7937 if (parse_long_name) {
7938 if (**ptr != '}') {
7939 absolute_short_name = 0;
7940 } else {
7941 (*ptr)++; // skip over the right brace
7942 }
7943 }
7944
7945 // Attempt to fill the buffer with the requested
7946 // value using snprintf within __kmp_str_buf_print()
7947 switch (absolute_short_name) {
7948 case 't':
7949 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
7950 break;
7951 case 'T':
7952 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
7953 break;
7954 case 'L':
7955 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
7956 break;
7957 case 'n':
7958 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
7959 break;
7960 case 'H': {
7961 static const int BUFFER_SIZE = 256;
7962 char buf[BUFFER_SIZE];
7963 __kmp_expand_host_name(buf, BUFFER_SIZE);
7964 rc = __kmp_str_buf_print(field_buffer, format, buf);
7965 } break;
7966 case 'P':
7967 rc = __kmp_str_buf_print(field_buffer, format, getpid());
7968 break;
7969 case 'i':
7970 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
7971 break;
7972 case 'N':
7973 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
7974 break;
7975 case 'a':
7976 field_value =
7977 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
7978 rc = __kmp_str_buf_print(field_buffer, format, field_value);
7979 break;
7980#if KMP_AFFINITY_SUPPORTED
7981 case 'A': {
7982 kmp_str_buf_t buf;
7983 __kmp_str_buf_init(&buf);
7984 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
7985 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
7986 __kmp_str_buf_free(&buf);
7987 } break;
7988#endif
7989 default:
7990 // According to spec, If an implementation does not have info for field
7991 // type, then "undefined" is printed
7992 rc = __kmp_str_buf_print(field_buffer, "%s", "undefined");
7993 // Skip the field
7994 if (parse_long_name) {
7995 SKIP_TOKEN(*ptr);
7996 if (**ptr == '}')
7997 (*ptr)++;
7998 } else {
7999 (*ptr)++;
8000 }
8001 }
8002
8003 KMP_ASSERT(format_index <= FORMAT_SIZE);
8004 return rc;
8005}
8006
8007/*
8008 * Return number of characters needed to hold the affinity string
8009 * (not including null byte character)
8010 * The resultant string is printed to buffer, which the caller can then
8011 * handle afterwards
8012*/
8013size_t __kmp_aux_capture_affinity(int gtid, const char *format,
8014 kmp_str_buf_t *buffer) {
8015 const char *parse_ptr;
8016 size_t retval;
8017 const kmp_info_t *th;
8018 kmp_str_buf_t field;
8019
8020 KMP_DEBUG_ASSERT(buffer);
8021 KMP_DEBUG_ASSERT(gtid >= 0);
8022
8023 __kmp_str_buf_init(&field);
8024 __kmp_str_buf_clear(buffer);
8025
8026 th = __kmp_threads[gtid];
8027 retval = 0;
8028
8029 // If format is NULL or zero-length string, then we use
8030 // affinity-format-var ICV
8031 parse_ptr = format;
8032 if (parse_ptr == NULL || *parse_ptr == '\0') {
8033 parse_ptr = __kmp_affinity_format;
8034 }
8035 KMP_DEBUG_ASSERT(parse_ptr);
8036
8037 while (*parse_ptr != '\0') {
8038 // Parse a field
8039 if (*parse_ptr == '%') {
8040 // Put field in the buffer
8041 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8042 __kmp_str_buf_catbuf(buffer, &field);
8043 retval += rc;
8044 } else {
8045 // Put literal character in buffer
8046 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8047 retval++;
8048 parse_ptr++;
8049 }
8050 }
8051 __kmp_str_buf_free(&field);
8052 return retval;
8053}
8054
8055// Displays the affinity string to stdout
8056void __kmp_aux_display_affinity(int gtid, const char *format) {
8057 kmp_str_buf_t buf;
8058 __kmp_str_buf_init(&buf);
8059 __kmp_aux_capture_affinity(gtid, format, &buf);
8060 __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str);
8061 __kmp_str_buf_free(&buf);
8062}
8063#endif // OMP_50_ENABLED
8064
Jim Cownie5e8470a2013-09-27 10:38:44 +00008065/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00008066
Jonathan Peyton30419822017-05-12 18:01:32 +00008067void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
8068 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00008069#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00008070 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00008071#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00008072 int bt_set;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008073
Jonathan Peyton30419822017-05-12 18:01:32 +00008074 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00008075
Jonathan Peyton30419822017-05-12 18:01:32 +00008076 /* Normalize and set blocktime for the teams */
8077 if (blocktime < KMP_MIN_BLOCKTIME)
8078 blocktime = KMP_MIN_BLOCKTIME;
8079 else if (blocktime > KMP_MAX_BLOCKTIME)
8080 blocktime = KMP_MAX_BLOCKTIME;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008081
Jonathan Peyton30419822017-05-12 18:01:32 +00008082 set__blocktime_team(thread->th.th_team, tid, blocktime);
8083 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
Jim Cownie5e8470a2013-09-27 10:38:44 +00008084
Jonathan Peytone1c7c132016-10-07 18:12:19 +00008085#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00008086 /* Calculate and set blocktime intervals for the teams */
8087 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
Jim Cownie5e8470a2013-09-27 10:38:44 +00008088
Jonathan Peyton30419822017-05-12 18:01:32 +00008089 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8090 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00008091#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00008092
Jonathan Peyton30419822017-05-12 18:01:32 +00008093 /* Set whether blocktime has been set to "TRUE" */
8094 bt_set = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008095
Jonathan Peyton30419822017-05-12 18:01:32 +00008096 set__bt_set_team(thread->th.th_team, tid, bt_set);
8097 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00008098#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00008099 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8100 "bt_intervals=%d, monitor_updates=%d\n",
8101 __kmp_gtid_from_tid(tid, thread->th.th_team),
8102 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8103 __kmp_monitor_wakeups));
Samuel Antao33515192016-10-20 13:20:17 +00008104#else
Jonathan Peyton30419822017-05-12 18:01:32 +00008105 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8106 __kmp_gtid_from_tid(tid, thread->th.th_team),
8107 thread->th.th_team->t.t_id, tid, blocktime));
Jonathan Peytone1c7c132016-10-07 18:12:19 +00008108#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00008109}
8110
Jonathan Peyton30419822017-05-12 18:01:32 +00008111void __kmp_aux_set_defaults(char const *str, int len) {
8112 if (!__kmp_init_serial) {
8113 __kmp_serial_initialize();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00008114 }
Jonathan Peyton30419822017-05-12 18:01:32 +00008115 __kmp_env_initialize(str);
Jim Cownie5e8470a2013-09-27 10:38:44 +00008116
Jonathan Peyton30419822017-05-12 18:01:32 +00008117 if (__kmp_settings
Jim Cownie5e8470a2013-09-27 10:38:44 +00008118#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00008119 || __kmp_display_env || __kmp_display_env_verbose
Jim Cownie5e8470a2013-09-27 10:38:44 +00008120#endif // OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00008121 ) {
8122 __kmp_env_print();
8123 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00008124} // __kmp_aux_set_defaults
8125
8126/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00008127/* internal fast reduction routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00008128
Jim Cownie5e8470a2013-09-27 10:38:44 +00008129PACKED_REDUCTION_METHOD_T
Jonathan Peyton30419822017-05-12 18:01:32 +00008130__kmp_determine_reduction_method(
8131 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
8132 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
8133 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00008134
Jonathan Peyton30419822017-05-12 18:01:32 +00008135 // Default reduction method: critical construct ( lck != NULL, like in current
8136 // PAROPT )
8137 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
8138 // can be selected by RTL
8139 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
8140 // can be selected by RTL
8141 // Finally, it's up to OpenMP RTL to make a decision on which method to select
8142 // among generated by PAROPT.
Jim Cownie5e8470a2013-09-27 10:38:44 +00008143
Jonathan Peyton30419822017-05-12 18:01:32 +00008144 PACKED_REDUCTION_METHOD_T retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008145
Jonathan Peyton30419822017-05-12 18:01:32 +00008146 int team_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008147
Jonathan Peyton30419822017-05-12 18:01:32 +00008148 KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 )
8149 KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )
Jim Cownie5e8470a2013-09-27 10:38:44 +00008150
Jonathan Peyton30419822017-05-12 18:01:32 +00008151#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8152 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
8153#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
Jim Cownie5e8470a2013-09-27 10:38:44 +00008154
Jonathan Peyton30419822017-05-12 18:01:32 +00008155 retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008156
Jonathan Peyton30419822017-05-12 18:01:32 +00008157 // another choice of getting a team size (with 1 dynamic deference) is slower
8158 team_size = __kmp_get_team_num_threads(global_tid);
8159 if (team_size == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00008160
Jonathan Peyton30419822017-05-12 18:01:32 +00008161 retval = empty_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008162
Jonathan Peyton30419822017-05-12 18:01:32 +00008163 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00008164
Jonathan Peyton30419822017-05-12 18:01:32 +00008165 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008166
Jonathan Peyton30419822017-05-12 18:01:32 +00008167#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie5e8470a2013-09-27 10:38:44 +00008168
Kamil Rytarowskia56ac942018-12-09 16:40:33 +00008169#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
Kamil Rytarowski7e1ea992018-12-09 16:46:48 +00008170 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
Jim Cownie5e8470a2013-09-27 10:38:44 +00008171
Jonathan Peyton30419822017-05-12 18:01:32 +00008172 int teamsize_cutoff = 4;
Jonathan Peyton91b78702015-06-08 19:39:07 +00008173
Jonathan Peyton492e0a32017-06-13 17:17:26 +00008174#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00008175 if (__kmp_mic_type != non_mic) {
8176 teamsize_cutoff = 8;
8177 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00008178#endif
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00008179 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jonathan Peyton30419822017-05-12 18:01:32 +00008180 if (tree_available) {
8181 if (team_size <= teamsize_cutoff) {
8182 if (atomic_available) {
8183 retval = atomic_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00008184 }
Jonathan Peyton30419822017-05-12 18:01:32 +00008185 } else {
8186 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8187 }
8188 } else if (atomic_available) {
8189 retval = atomic_reduce_block;
8190 }
8191#else
8192#error "Unknown or unsupported OS"
Jonathan Peyton17e53b92018-12-10 18:26:50 +00008193#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
8194 // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
Jim Cownie5e8470a2013-09-27 10:38:44 +00008195
Jonathan Peyton30419822017-05-12 18:01:32 +00008196#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8197
Andrey Churbanov855d0982018-11-07 12:27:38 +00008198#if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_HURD
Jonathan Peyton30419822017-05-12 18:01:32 +00008199
8200 // basic tuning
8201
8202 if (atomic_available) {
8203 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
8204 retval = atomic_reduce_block;
8205 }
8206 } // otherwise: use critical section
8207
8208#elif KMP_OS_DARWIN
8209
Jonathan Peytonbaad3f62018-08-09 22:04:30 +00008210 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jonathan Peyton30419822017-05-12 18:01:32 +00008211 if (atomic_available && (num_vars <= 3)) {
8212 retval = atomic_reduce_block;
8213 } else if (tree_available) {
8214 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
8215 (reduce_size < (2000 * sizeof(kmp_real64)))) {
8216 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8217 }
8218 } // otherwise: use critical section
8219
8220#else
8221#error "Unknown or unsupported OS"
8222#endif
8223
8224#else
8225#error "Unknown or unsupported architecture"
8226#endif
8227 }
8228
8229 // KMP_FORCE_REDUCTION
8230
8231 // If the team is serialized (team_size == 1), ignore the forced reduction
8232 // method and stay with the unsynchronized method (empty_reduce_block)
8233 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8234 team_size != 1) {
8235
8236 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8237
8238 int atomic_available, tree_available;
8239
8240 switch ((forced_retval = __kmp_force_reduction_method)) {
8241 case critical_reduce_block:
8242 KMP_ASSERT(lck); // lck should be != 0
8243 break;
8244
8245 case atomic_reduce_block:
8246 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8247 if (!atomic_available) {
8248 KMP_WARNING(RedMethodNotSupported, "atomic");
8249 forced_retval = critical_reduce_block;
8250 }
8251 break;
8252
8253 case tree_reduce_block:
8254 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8255 if (!tree_available) {
8256 KMP_WARNING(RedMethodNotSupported, "tree");
8257 forced_retval = critical_reduce_block;
8258 } else {
8259#if KMP_FAST_REDUCTION_BARRIER
8260 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8261#endif
8262 }
8263 break;
8264
8265 default:
8266 KMP_ASSERT(0); // "unsupported method specified"
Jim Cownie5e8470a2013-09-27 10:38:44 +00008267 }
8268
Jonathan Peyton30419822017-05-12 18:01:32 +00008269 retval = forced_retval;
8270 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00008271
Jonathan Peyton30419822017-05-12 18:01:32 +00008272 KA_TRACE(10, ("reduction method selected=%08x\n", retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00008273
Jonathan Peyton30419822017-05-12 18:01:32 +00008274#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8275#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8276
8277 return (retval);
Jim Cownie5e8470a2013-09-27 10:38:44 +00008278}
8279
8280// this function is for testing set/get/determine reduce method
Jonathan Peyton30419822017-05-12 18:01:32 +00008281kmp_int32 __kmp_get_reduce_method(void) {
8282 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
Jim Cownie5e8470a2013-09-27 10:38:44 +00008283}
Jonathan Peyton9b8bb322019-01-16 20:07:39 +00008284
8285#if OMP_50_ENABLED
8286
8287// Soft pause sets up threads to ignore blocktime and just go to sleep.
8288// Spin-wait code checks __kmp_pause_status and reacts accordingly.
8289void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8290
8291// Hard pause shuts down the runtime completely. Resume happens naturally when
8292// OpenMP is used subsequently.
8293void __kmp_hard_pause() {
8294 __kmp_pause_status = kmp_hard_paused;
8295 __kmp_internal_end_thread(-1);
8296}
8297
8298// Soft resume sets __kmp_pause_status, and wakes up all threads.
8299void __kmp_resume_if_soft_paused() {
8300 if (__kmp_pause_status == kmp_soft_paused) {
8301 __kmp_pause_status = kmp_not_paused;
8302
8303 for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8304 kmp_info_t *thread = __kmp_threads[gtid];
8305 if (thread) { // Wake it if sleeping
8306 kmp_flag_64 fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
8307 if (fl.is_sleeping())
8308 fl.resume(gtid);
8309 else if (__kmp_try_suspend_mx(thread)) { // got suspend lock
8310 __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep
8311 } else { // thread holds the lock and may sleep soon
8312 do { // until either the thread sleeps, or we can get the lock
8313 if (fl.is_sleeping()) {
8314 fl.resume(gtid);
8315 break;
8316 } else if (__kmp_try_suspend_mx(thread)) {
8317 __kmp_unlock_suspend_mx(thread);
8318 break;
8319 }
8320 } while (1);
8321 }
8322 }
8323 }
8324 }
8325}
8326
8327// This function is called via __kmpc_pause_resource. Returns 0 if successful.
8328// TODO: add warning messages
8329int __kmp_pause_resource(kmp_pause_status_t level) {
8330 if (level == kmp_not_paused) { // requesting resume
8331 if (__kmp_pause_status == kmp_not_paused) {
8332 // error message about runtime not being paused, so can't resume
8333 return 1;
8334 } else {
8335 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8336 __kmp_pause_status == kmp_hard_paused);
8337 __kmp_pause_status = kmp_not_paused;
8338 return 0;
8339 }
8340 } else if (level == kmp_soft_paused) { // requesting soft pause
8341 if (__kmp_pause_status != kmp_not_paused) {
8342 // error message about already being paused
8343 return 1;
8344 } else {
8345 __kmp_soft_pause();
8346 return 0;
8347 }
8348 } else if (level == kmp_hard_paused) { // requesting hard pause
8349 if (__kmp_pause_status != kmp_not_paused) {
8350 // error message about already being paused
8351 return 1;
8352 } else {
8353 __kmp_hard_pause();
8354 return 0;
8355 }
8356 } else {
8357 // error message about invalid level
8358 return 1;
8359 }
8360}
8361
8362#endif // OMP_50_ENABLED