blob: bf0f9dc03e34f27eefe27c2d55e8732f7ae654e2 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_runtime.cpp -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
Jim Cownie5e8470a2013-09-27 10:38:44 +000014#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000015#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000016#include "kmp_atomic.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000017#include "kmp_environment.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000018#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000019#include "kmp_i18n.h"
20#include "kmp_io.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000021#include "kmp_itt.h"
22#include "kmp_settings.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000023#include "kmp_stats.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000024#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000025#include "kmp_wait_release.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000026#include "kmp_wrapper_getpid.h"
Jonathan Peytonf6399362018-07-09 17:51:13 +000027#include "kmp_dispatch.h"
28#if KMP_USE_HIER_SCHED
29#include "kmp_dispatch_hier.h"
30#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000031
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000032#if OMPT_SUPPORT
33#include "ompt-specific.h"
34#endif
35
Jim Cownie5e8470a2013-09-27 10:38:44 +000036/* these are temporary issues to be dealt with */
37#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000038
Jim Cownie5e8470a2013-09-27 10:38:44 +000039#if KMP_OS_WINDOWS
40#include <process.h>
41#endif
42
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000043#include "tsan_annotations.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000044
45#if defined(KMP_GOMP_COMPAT)
Jonathan Peyton30419822017-05-12 18:01:32 +000046char const __kmp_version_alt_comp[] =
47 KMP_VERSION_PREFIX "alternative compiler support: yes";
Jim Cownie5e8470a2013-09-27 10:38:44 +000048#endif /* defined(KMP_GOMP_COMPAT) */
49
50char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peytone844a542017-03-06 22:07:40 +000051#if OMP_50_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000052 "5.0 (201611)";
Jonathan Peytone844a542017-03-06 22:07:40 +000053#elif OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000054 "4.5 (201511)";
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000055#elif OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000056 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000057#else
Jonathan Peyton30419822017-05-12 18:01:32 +000058 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000059#endif
60
61#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +000062char const __kmp_version_lock[] =
63 KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000064#endif /* KMP_DEBUG */
65
Jonathan Peyton30419822017-05-12 18:01:32 +000066#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
Jim Cownie181b4bb2013-12-23 17:28:57 +000067
Jim Cownie5e8470a2013-09-27 10:38:44 +000068/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +000069
Jonathan Peyton37e2ef52018-07-09 17:36:22 +000070#if KMP_USE_MONITOR
Jim Cownie5e8470a2013-09-27 10:38:44 +000071kmp_info_t __kmp_monitor;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +000072#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +000073
Jim Cownie5e8470a2013-09-27 10:38:44 +000074/* Forward declarations */
75
Jonathan Peyton30419822017-05-12 18:01:32 +000076void __kmp_cleanup(void);
Jim Cownie5e8470a2013-09-27 10:38:44 +000077
Jonathan Peyton30419822017-05-12 18:01:32 +000078static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
79 int gtid);
80static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
81 kmp_internal_control_t *new_icvs,
82 ident_t *loc);
Jonathan Peyton2321d572015-06-08 19:25:25 +000083#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +000084static void __kmp_partition_places(kmp_team_t *team,
85 int update_master_only = 0);
Jonathan Peyton2321d572015-06-08 19:25:25 +000086#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000087static void __kmp_do_serial_initialize(void);
88void __kmp_fork_barrier(int gtid, int tid);
89void __kmp_join_barrier(int gtid);
90void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
91 kmp_internal_control_t *new_icvs, ident_t *loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000092
Jim Cownie5e8470a2013-09-27 10:38:44 +000093#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +000094static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000095#endif
96
Jonathan Peyton1800ece2018-01-10 18:27:01 +000097static int __kmp_expand_threads(int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000098#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +000099static int __kmp_unregister_root_other_thread(int gtid);
Jonathan Peyton2321d572015-06-08 19:25:25 +0000100#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000101static void __kmp_unregister_library(void); // called by __kmp_internal_end()
102static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
Jonathan Peytoneaa9e402018-01-10 18:21:48 +0000103kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000104
Jim Cownie5e8470a2013-09-27 10:38:44 +0000105/* Calculate the identifier of the current thread */
Jonathan Peyton30419822017-05-12 18:01:32 +0000106/* fast (and somewhat portable) way to get unique identifier of executing
107 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
Jonathan Peyton30419822017-05-12 18:01:32 +0000108int __kmp_get_global_thread_id() {
109 int i;
110 kmp_info_t **other_threads;
111 size_t stack_data;
112 char *stack_addr;
113 size_t stack_size;
114 char *stack_base;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000115
Jonathan Peyton30419822017-05-12 18:01:32 +0000116 KA_TRACE(
117 1000,
118 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
119 __kmp_nth, __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000120
Jonathan Peyton30419822017-05-12 18:01:32 +0000121 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
122 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
123 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
124 __kmp_init_gtid for this to work. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000125
Jonathan Peyton30419822017-05-12 18:01:32 +0000126 if (!TCR_4(__kmp_init_gtid))
127 return KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000128
129#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000130 if (TCR_4(__kmp_gtid_mode) >= 3) {
131 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"));
132 return __kmp_gtid;
133 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000134#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000135 if (TCR_4(__kmp_gtid_mode) >= 2) {
136 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"));
137 return __kmp_gtid_get_specific();
138 }
139 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"));
140
141 stack_addr = (char *)&stack_data;
142 other_threads = __kmp_threads;
143
144 /* ATT: The code below is a source of potential bugs due to unsynchronized
145 access to __kmp_threads array. For example:
146 1. Current thread loads other_threads[i] to thr and checks it, it is
147 non-NULL.
148 2. Current thread is suspended by OS.
149 3. Another thread unregisters and finishes (debug versions of free()
150 may fill memory with something like 0xEF).
151 4. Current thread is resumed.
152 5. Current thread reads junk from *thr.
153 TODO: Fix it. --ln */
154
155 for (i = 0; i < __kmp_threads_capacity; i++) {
156
157 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
158 if (!thr)
159 continue;
160
161 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
162 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
163
164 /* stack grows down -- search through all of the active threads */
165
166 if (stack_addr <= stack_base) {
167 size_t stack_diff = stack_base - stack_addr;
168
169 if (stack_diff <= stack_size) {
170 /* The only way we can be closer than the allocated */
171 /* stack size is if we are running on this thread. */
172 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
173 return i;
174 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000175 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000176 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000177
Jonathan Peyton30419822017-05-12 18:01:32 +0000178 /* get specific to try and determine our gtid */
179 KA_TRACE(1000,
180 ("*** __kmp_get_global_thread_id: internal alg. failed to find "
181 "thread, using TLS\n"));
182 i = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000183
Jonathan Peyton30419822017-05-12 18:01:32 +0000184 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000185
Jonathan Peyton30419822017-05-12 18:01:32 +0000186 /* if we havn't been assigned a gtid, then return code */
187 if (i < 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000188 return i;
Jonathan Peyton30419822017-05-12 18:01:32 +0000189
190 /* dynamically updated stack window for uber threads to avoid get_specific
191 call */
192 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
193 KMP_FATAL(StackOverflow, i);
194 }
195
196 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
197 if (stack_addr > stack_base) {
198 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
199 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
200 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
201 stack_base);
202 } else {
203 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
204 stack_base - stack_addr);
205 }
206
207 /* Reprint stack bounds for ubermaster since they have been refined */
208 if (__kmp_storage_map) {
209 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
210 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
211 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
212 other_threads[i]->th.th_info.ds.ds_stacksize,
213 "th_%d stack (refinement)", i);
214 }
215 return i;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000216}
217
Jonathan Peyton30419822017-05-12 18:01:32 +0000218int __kmp_get_global_thread_id_reg() {
219 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000220
Jonathan Peyton30419822017-05-12 18:01:32 +0000221 if (!__kmp_init_serial) {
222 gtid = KMP_GTID_DNE;
223 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000224#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000225 if (TCR_4(__kmp_gtid_mode) >= 3) {
226 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"));
227 gtid = __kmp_gtid;
228 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000229#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000230 if (TCR_4(__kmp_gtid_mode) >= 2) {
231 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
232 gtid = __kmp_gtid_get_specific();
233 } else {
234 KA_TRACE(1000,
235 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
236 gtid = __kmp_get_global_thread_id();
237 }
238
239 /* we must be a new uber master sibling thread */
240 if (gtid == KMP_GTID_DNE) {
241 KA_TRACE(10,
242 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
243 "Registering a new gtid.\n"));
244 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
245 if (!__kmp_init_serial) {
246 __kmp_do_serial_initialize();
247 gtid = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000248 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000249 gtid = __kmp_register_root(FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000250 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000251 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
252 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
253 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000254
Jonathan Peyton30419822017-05-12 18:01:32 +0000255 KMP_DEBUG_ASSERT(gtid >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000256
Jonathan Peyton30419822017-05-12 18:01:32 +0000257 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000258}
259
260/* caller must hold forkjoin_lock */
Jonathan Peyton30419822017-05-12 18:01:32 +0000261void __kmp_check_stack_overlap(kmp_info_t *th) {
262 int f;
263 char *stack_beg = NULL;
264 char *stack_end = NULL;
265 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000266
Jonathan Peyton30419822017-05-12 18:01:32 +0000267 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"));
268 if (__kmp_storage_map) {
269 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
270 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000271
Jonathan Peyton30419822017-05-12 18:01:32 +0000272 gtid = __kmp_gtid_from_thread(th);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000273
Jonathan Peyton30419822017-05-12 18:01:32 +0000274 if (gtid == KMP_GTID_MONITOR) {
275 __kmp_print_storage_map_gtid(
276 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
277 "th_%s stack (%s)", "mon",
278 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000279 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000280 __kmp_print_storage_map_gtid(
281 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
282 "th_%d stack (%s)", gtid,
283 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
284 }
285 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000286
Jonathan Peyton30419822017-05-12 18:01:32 +0000287 /* No point in checking ubermaster threads since they use refinement and
288 * cannot overlap */
289 gtid = __kmp_gtid_from_thread(th);
290 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
291 KA_TRACE(10,
292 ("__kmp_check_stack_overlap: performing extensive checking\n"));
293 if (stack_beg == NULL) {
294 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
295 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
296 }
297
298 for (f = 0; f < __kmp_threads_capacity; f++) {
299 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
300
301 if (f_th && f_th != th) {
302 char *other_stack_end =
303 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
304 char *other_stack_beg =
305 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
306 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
307 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
308
309 /* Print the other stack values before the abort */
310 if (__kmp_storage_map)
311 __kmp_print_storage_map_gtid(
312 -1, other_stack_beg, other_stack_end,
313 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
314 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
315
Jonathan Peyton6a393f72017-09-05 15:43:58 +0000316 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
317 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +0000318 }
319 }
320 }
321 }
322 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"));
323}
324
325/* ------------------------------------------------------------------------ */
326
327void __kmp_infinite_loop(void) {
328 static int done = FALSE;
329
330 while (!done) {
331 KMP_YIELD(1);
332 }
333}
334
335#define MAX_MESSAGE 512
336
337void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
338 char const *format, ...) {
339 char buffer[MAX_MESSAGE];
340 va_list ap;
341
342 va_start(ap, format);
343 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
344 p2, (unsigned long)size, format);
345 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
346 __kmp_vprintf(kmp_err, buffer, ap);
347#if KMP_PRINT_DATA_PLACEMENT
348 int node;
349 if (gtid >= 0) {
350 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
351 if (__kmp_storage_map_verbose) {
352 node = __kmp_get_host_node(p1);
353 if (node < 0) /* doesn't work, so don't try this next time */
354 __kmp_storage_map_verbose = FALSE;
355 else {
356 char *last;
357 int lastNode;
358 int localProc = __kmp_get_cpu_from_gtid(gtid);
359
360 const int page_size = KMP_GET_PAGE_SIZE();
361
362 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
363 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
364 if (localProc >= 0)
365 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
366 localProc >> 1);
367 else
368 __kmp_printf_no_lock(" GTID %d\n", gtid);
369#if KMP_USE_PRCTL
370 /* The more elaborate format is disabled for now because of the prctl
371 * hanging bug. */
372 do {
373 last = p1;
374 lastNode = node;
375 /* This loop collates adjacent pages with the same host node. */
376 do {
377 (char *)p1 += page_size;
378 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
379 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
380 lastNode);
381 } while (p1 <= p2);
382#else
383 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
384 (char *)p1 + (page_size - 1),
385 __kmp_get_host_node(p1));
386 if (p1 < p2) {
387 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
388 (char *)p2 + (page_size - 1),
389 __kmp_get_host_node(p2));
390 }
391#endif
392 }
393 }
394 } else
395 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning));
396 }
397#endif /* KMP_PRINT_DATA_PLACEMENT */
398 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
399}
400
401void __kmp_warn(char const *format, ...) {
402 char buffer[MAX_MESSAGE];
403 va_list ap;
404
405 if (__kmp_generate_warnings == kmp_warnings_off) {
406 return;
407 }
408
409 va_start(ap, format);
410
411 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format);
412 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
413 __kmp_vprintf(kmp_err, buffer, ap);
414 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
415
416 va_end(ap);
417}
418
419void __kmp_abort_process() {
420 // Later threads may stall here, but that's ok because abort() will kill them.
421 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
422
423 if (__kmp_debug_buf) {
424 __kmp_dump_debug_buffer();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000425 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000426
427 if (KMP_OS_WINDOWS) {
428 // Let other threads know of abnormal termination and prevent deadlock
429 // if abort happened during library initialization or shutdown
430 __kmp_global.g.g_abort = SIGABRT;
431
432 /* On Windows* OS by default abort() causes pop-up error box, which stalls
433 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
434 boxes. _set_abort_behavior() works well, but this function is not
435 available in VS7 (this is not problem for DLL, but it is a problem for
436 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
437 help, at least in some versions of MS C RTL.
438
439 It seems following sequence is the only way to simulate abort() and
440 avoid pop-up error box. */
441 raise(SIGABRT);
442 _exit(3); // Just in case, if signal ignored, exit anyway.
443 } else {
444 abort();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000445 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000446
447 __kmp_infinite_loop();
448 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000449
450} // __kmp_abort_process
451
Jonathan Peyton30419822017-05-12 18:01:32 +0000452void __kmp_abort_thread(void) {
453 // TODO: Eliminate g_abort global variable and this function.
454 // In case of abort just call abort(), it will kill all the threads.
455 __kmp_infinite_loop();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000456} // __kmp_abort_thread
457
Jonathan Peyton30419822017-05-12 18:01:32 +0000458/* Print out the storage map for the major kmp_info_t thread data structures
459 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000460
Jonathan Peyton30419822017-05-12 18:01:32 +0000461static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
462 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
463 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000464
Jonathan Peyton30419822017-05-12 18:01:32 +0000465 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
466 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000467
Jonathan Peyton30419822017-05-12 18:01:32 +0000468 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
469 sizeof(kmp_local_t), "th_%d.th_local", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000470
Jonathan Peyton30419822017-05-12 18:01:32 +0000471 __kmp_print_storage_map_gtid(
472 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
473 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000474
Jonathan Peyton30419822017-05-12 18:01:32 +0000475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
476 &thr->th.th_bar[bs_plain_barrier + 1],
477 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
478 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000479
Jonathan Peyton30419822017-05-12 18:01:32 +0000480 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
481 &thr->th.th_bar[bs_forkjoin_barrier + 1],
482 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
483 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000484
Jonathan Peyton30419822017-05-12 18:01:32 +0000485#if KMP_FAST_REDUCTION_BARRIER
486 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
487 &thr->th.th_bar[bs_reduction_barrier + 1],
488 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
489 gtid);
490#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000491}
492
Jonathan Peyton30419822017-05-12 18:01:32 +0000493/* Print out the storage map for the major kmp_team_t team data structures
494 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000495
Jonathan Peyton30419822017-05-12 18:01:32 +0000496static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
497 int team_id, int num_thr) {
498 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
499 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
500 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000501
Jonathan Peyton30419822017-05-12 18:01:32 +0000502 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
503 &team->t.t_bar[bs_last_barrier],
504 sizeof(kmp_balign_team_t) * bs_last_barrier,
505 "%s_%d.t_bar", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000506
Jonathan Peyton30419822017-05-12 18:01:32 +0000507 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
508 &team->t.t_bar[bs_plain_barrier + 1],
509 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
510 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000511
Jonathan Peyton30419822017-05-12 18:01:32 +0000512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
513 &team->t.t_bar[bs_forkjoin_barrier + 1],
514 sizeof(kmp_balign_team_t),
515 "%s_%d.t_bar[forkjoin]", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000516
Jonathan Peyton30419822017-05-12 18:01:32 +0000517#if KMP_FAST_REDUCTION_BARRIER
518 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
519 &team->t.t_bar[bs_reduction_barrier + 1],
520 sizeof(kmp_balign_team_t),
521 "%s_%d.t_bar[reduction]", header, team_id);
522#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000523
Jonathan Peyton30419822017-05-12 18:01:32 +0000524 __kmp_print_storage_map_gtid(
525 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
526 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000527
Jonathan Peyton30419822017-05-12 18:01:32 +0000528 __kmp_print_storage_map_gtid(
529 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
530 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000531
Jonathan Peyton30419822017-05-12 18:01:32 +0000532 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
533 &team->t.t_disp_buffer[num_disp_buff],
534 sizeof(dispatch_shared_info_t) * num_disp_buff,
535 "%s_%d.t_disp_buffer", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000536
Jonathan Peyton30419822017-05-12 18:01:32 +0000537 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
538 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header,
539 team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000540}
541
542static void __kmp_init_allocator() {}
543static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000544
545/* ------------------------------------------------------------------------ */
546
Jonathan Peyton99016992015-05-26 17:32:53 +0000547#ifdef KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +0000548#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +0000549
Jonathan Peyton30419822017-05-12 18:01:32 +0000550static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
551 // TODO: Change to __kmp_break_bootstrap_lock().
552 __kmp_init_bootstrap_lock(lck); // make the lock released
Jim Cownie5e8470a2013-09-27 10:38:44 +0000553}
554
Jonathan Peyton30419822017-05-12 18:01:32 +0000555static void __kmp_reset_locks_on_process_detach(int gtid_req) {
556 int i;
557 int thread_count;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000558
Jonathan Peyton30419822017-05-12 18:01:32 +0000559 // PROCESS_DETACH is expected to be called by a thread that executes
560 // ProcessExit() or FreeLibrary(). OS terminates other threads (except the one
561 // calling ProcessExit or FreeLibrary). So, it might be safe to access the
562 // __kmp_threads[] without taking the forkjoin_lock. However, in fact, some
563 // threads can be still alive here, although being about to be terminated. The
564 // threads in the array with ds_thread==0 are most suspicious. Actually, it
565 // can be not safe to access the __kmp_threads[].
Jim Cownie5e8470a2013-09-27 10:38:44 +0000566
Jonathan Peyton30419822017-05-12 18:01:32 +0000567 // TODO: does it make sense to check __kmp_roots[] ?
Jim Cownie5e8470a2013-09-27 10:38:44 +0000568
Jonathan Peyton30419822017-05-12 18:01:32 +0000569 // Let's check that there are no other alive threads registered with the OMP
570 // lib.
571 while (1) {
572 thread_count = 0;
573 for (i = 0; i < __kmp_threads_capacity; ++i) {
574 if (!__kmp_threads)
575 continue;
576 kmp_info_t *th = __kmp_threads[i];
577 if (th == NULL)
578 continue;
579 int gtid = th->th.th_info.ds.ds_gtid;
580 if (gtid == gtid_req)
581 continue;
582 if (gtid < 0)
583 continue;
584 DWORD exit_val;
585 int alive = __kmp_is_thread_alive(th, &exit_val);
586 if (alive) {
587 ++thread_count;
588 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000589 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000590 if (thread_count == 0)
591 break; // success
592 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000593
Jonathan Peyton30419822017-05-12 18:01:32 +0000594 // Assume that I'm alone. Now it might be safe to check and reset locks.
595 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
596 __kmp_reset_lock(&__kmp_forkjoin_lock);
597#ifdef KMP_DEBUG
598 __kmp_reset_lock(&__kmp_stdio_lock);
599#endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000600}
601
Jonathan Peyton30419822017-05-12 18:01:32 +0000602BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
603 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000604
Jonathan Peyton30419822017-05-12 18:01:32 +0000605 switch (fdwReason) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000606
Jonathan Peyton30419822017-05-12 18:01:32 +0000607 case DLL_PROCESS_ATTACH:
608 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000609
610 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000611
612 case DLL_PROCESS_DETACH:
613 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
614
615 if (lpReserved != NULL) {
616 // lpReserved is used for telling the difference:
617 // lpReserved == NULL when FreeLibrary() was called,
618 // lpReserved != NULL when the process terminates.
619 // When FreeLibrary() is called, worker threads remain alive. So they will
620 // release the forkjoin lock by themselves. When the process terminates,
621 // worker threads disappear triggering the problem of unreleased forkjoin
622 // lock as described below.
623
624 // A worker thread can take the forkjoin lock. The problem comes up if
625 // that worker thread becomes dead before it releases the forkjoin lock.
626 // The forkjoin lock remains taken, while the thread executing
627 // DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below will try
628 // to take the forkjoin lock and will always fail, so that the application
629 // will never finish [normally]. This scenario is possible if
630 // __kmpc_end() has not been executed. It looks like it's not a corner
631 // case, but common cases:
632 // - the main function was compiled by an alternative compiler;
633 // - the main function was compiled by icl but without /Qopenmp
634 // (application with plugins);
635 // - application terminates by calling C exit(), Fortran CALL EXIT() or
636 // Fortran STOP.
637 // - alive foreign thread prevented __kmpc_end from doing cleanup.
638 //
639 // This is a hack to work around the problem.
640 // TODO: !!! figure out something better.
641 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
642 }
643
644 __kmp_internal_end_library(__kmp_gtid_get_specific());
645
646 return TRUE;
647
648 case DLL_THREAD_ATTACH:
649 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"));
650
651 /* if we want to register new siblings all the time here call
652 * __kmp_get_gtid(); */
653 return TRUE;
654
655 case DLL_THREAD_DETACH:
656 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
657
658 __kmp_internal_end_thread(__kmp_gtid_get_specific());
659 return TRUE;
660 }
661
662 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000663}
664
Jonathan Peyton30419822017-05-12 18:01:32 +0000665#endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000666#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000667
Jim Cownie5e8470a2013-09-27 10:38:44 +0000668/* Change the library type to "status" and return the old type */
669/* called from within initialization routines where __kmp_initz_lock is held */
Jonathan Peyton30419822017-05-12 18:01:32 +0000670int __kmp_change_library(int status) {
671 int old_status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000672
Jonathan Peyton30419822017-05-12 18:01:32 +0000673 old_status = __kmp_yield_init &
674 1; // check whether KMP_LIBRARY=throughput (even init count)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000675
Jonathan Peyton30419822017-05-12 18:01:32 +0000676 if (status) {
677 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
678 } else {
679 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
680 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000681
Jonathan Peyton30419822017-05-12 18:01:32 +0000682 return old_status; // return previous setting of whether
683 // KMP_LIBRARY=throughput
Jim Cownie5e8470a2013-09-27 10:38:44 +0000684}
685
Jonathan Peyton30419822017-05-12 18:01:32 +0000686/* __kmp_parallel_deo -- Wait until it's our turn. */
687void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
688 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000689#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000690 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000691#endif /* BUILD_PARALLEL_ORDERED */
692
Jonathan Peyton30419822017-05-12 18:01:32 +0000693 if (__kmp_env_consistency_check) {
694 if (__kmp_threads[gtid]->th.th_root->r.r_active)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000695#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000696 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000697#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000698 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000699#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000700 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000702 if (!team->t.t_serialized) {
703 KMP_MB();
704 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
705 KMP_EQ, NULL);
706 KMP_MB();
707 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000708#endif /* BUILD_PARALLEL_ORDERED */
709}
710
Jonathan Peyton30419822017-05-12 18:01:32 +0000711/* __kmp_parallel_dxo -- Signal the next task. */
712void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
713 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000714#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000715 int tid = __kmp_tid_from_gtid(gtid);
716 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000717#endif /* BUILD_PARALLEL_ORDERED */
718
Jonathan Peyton30419822017-05-12 18:01:32 +0000719 if (__kmp_env_consistency_check) {
720 if (__kmp_threads[gtid]->th.th_root->r.r_active)
721 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
722 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000723#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000724 if (!team->t.t_serialized) {
725 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000726
Jonathan Peyton30419822017-05-12 18:01:32 +0000727 /* use the tid of the next thread in this team */
728 /* TODO replace with general release procedure */
729 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730
Jonathan Peyton30419822017-05-12 18:01:32 +0000731 KMP_MB(); /* Flush all pending memory write invalidates. */
732 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000733#endif /* BUILD_PARALLEL_ORDERED */
734}
735
736/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000737/* The BARRIER for a SINGLE process section is always explicit */
738
Jonathan Peyton30419822017-05-12 18:01:32 +0000739int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
740 int status;
741 kmp_info_t *th;
742 kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000743
Jonathan Peyton30419822017-05-12 18:01:32 +0000744 if (!TCR_4(__kmp_init_parallel))
745 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000746
Jonathan Peyton30419822017-05-12 18:01:32 +0000747 th = __kmp_threads[gtid];
748 team = th->th.th_team;
749 status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000750
Jonathan Peyton30419822017-05-12 18:01:32 +0000751 th->th.th_ident = id_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000752
Jonathan Peyton30419822017-05-12 18:01:32 +0000753 if (team->t.t_serialized) {
754 status = 1;
755 } else {
756 kmp_int32 old_this = th->th.th_local.this_construct;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000757
Jonathan Peyton30419822017-05-12 18:01:32 +0000758 ++th->th.th_local.this_construct;
759 /* try to set team count to thread count--success means thread got the
760 single block */
761 /* TODO: Should this be acquire or release? */
762 if (team->t.t_construct == old_this) {
Jonathan Peyton37e2ef52018-07-09 17:36:22 +0000763 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
764 th->th.th_local.this_construct);
Jonathan Peyton30419822017-05-12 18:01:32 +0000765 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000766#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000767 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
768 KMP_MASTER_GTID(gtid) &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000769#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000770 th->th.th_teams_microtask == NULL &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000771#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000772 team->t.t_active_level ==
773 1) { // Only report metadata by master of active team at level 1
774 __kmp_itt_metadata_single(id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000775 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000776#endif /* USE_ITT_BUILD */
777 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000778
Jonathan Peyton30419822017-05-12 18:01:32 +0000779 if (__kmp_env_consistency_check) {
780 if (status && push_ws) {
781 __kmp_push_workshare(gtid, ct_psingle, id_ref);
782 } else {
783 __kmp_check_workshare(gtid, ct_psingle, id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000784 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000785 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000786#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000787 if (status) {
788 __kmp_itt_single_start(gtid);
789 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000790#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000791 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000792}
793
Jonathan Peyton30419822017-05-12 18:01:32 +0000794void __kmp_exit_single(int gtid) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000795#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000796 __kmp_itt_single_end(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000797#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000798 if (__kmp_env_consistency_check)
799 __kmp_pop_workshare(gtid, ct_psingle, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000800}
801
Jonathan Peyton30419822017-05-12 18:01:32 +0000802/* determine if we can go parallel or must use a serialized parallel region and
Jim Cownie5e8470a2013-09-27 10:38:44 +0000803 * how many threads we can use
804 * set_nproc is the number of threads requested for the team
805 * returns 0 if we should serialize or only use one thread,
806 * otherwise the number of threads to use
Jonathan Peyton30419822017-05-12 18:01:32 +0000807 * The forkjoin lock is held by the caller. */
808static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
809 int master_tid, int set_nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +0000810#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000811 ,
812 int enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +0000813#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +0000814 ) {
815 int capacity;
816 int new_nthreads;
817 KMP_DEBUG_ASSERT(__kmp_init_serial);
818 KMP_DEBUG_ASSERT(root && parent_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000819
Jonathan Peyton30419822017-05-12 18:01:32 +0000820 // If dyn-var is set, dynamically adjust the number of desired threads,
821 // according to the method specified by dynamic_mode.
822 new_nthreads = set_nthreads;
823 if (!get__dynamic_2(parent_team, master_tid)) {
824 ;
825 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000826#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +0000827 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
828 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
829 if (new_nthreads == 1) {
830 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
831 "reservation to 1 thread\n",
832 master_tid));
833 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000834 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000835 if (new_nthreads < set_nthreads) {
836 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
837 "reservation to %d threads\n",
838 master_tid, new_nthreads));
839 }
840 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000841#endif /* USE_LOAD_BALANCE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000842 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
843 new_nthreads = __kmp_avail_proc - __kmp_nth +
844 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
845 if (new_nthreads <= 1) {
846 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
847 "reservation to 1 thread\n",
848 master_tid));
849 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000850 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000851 if (new_nthreads < set_nthreads) {
852 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
853 "reservation to %d threads\n",
854 master_tid, new_nthreads));
855 } else {
856 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000857 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000858 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
859 if (set_nthreads > 2) {
860 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
861 new_nthreads = (new_nthreads % set_nthreads) + 1;
862 if (new_nthreads == 1) {
863 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
864 "reservation to 1 thread\n",
865 master_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000866 return 1;
Jonathan Peyton30419822017-05-12 18:01:32 +0000867 }
868 if (new_nthreads < set_nthreads) {
869 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
870 "reservation to %d threads\n",
871 master_tid, new_nthreads));
872 }
873 }
874 } else {
875 KMP_ASSERT(0);
876 }
877
Jonathan Peytonf4392462017-07-27 20:58:41 +0000878 // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
Jonathan Peyton30419822017-05-12 18:01:32 +0000879 if (__kmp_nth + new_nthreads -
880 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
881 __kmp_max_nth) {
882 int tl_nthreads = __kmp_max_nth - __kmp_nth +
883 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
884 if (tl_nthreads <= 0) {
885 tl_nthreads = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000886 }
887
Jonathan Peyton30419822017-05-12 18:01:32 +0000888 // If dyn-var is false, emit a 1-time warning.
889 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
890 __kmp_reserve_warn = 1;
891 __kmp_msg(kmp_ms_warning,
892 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
893 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
894 }
895 if (tl_nthreads == 1) {
Jonathan Peytonf4392462017-07-27 20:58:41 +0000896 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
897 "reduced reservation to 1 thread\n",
Jonathan Peyton30419822017-05-12 18:01:32 +0000898 master_tid));
899 return 1;
900 }
Jonathan Peytonf4392462017-07-27 20:58:41 +0000901 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
902 "reservation to %d threads\n",
903 master_tid, tl_nthreads));
904 new_nthreads = tl_nthreads;
905 }
906
907 // Respect OMP_THREAD_LIMIT
908 if (root->r.r_cg_nthreads + new_nthreads -
909 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
910 __kmp_cg_max_nth) {
911 int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads +
912 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
913 if (tl_nthreads <= 0) {
914 tl_nthreads = 1;
915 }
916
917 // If dyn-var is false, emit a 1-time warning.
918 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
919 __kmp_reserve_warn = 1;
920 __kmp_msg(kmp_ms_warning,
921 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
922 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
923 }
924 if (tl_nthreads == 1) {
925 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
926 "reduced reservation to 1 thread\n",
927 master_tid));
928 return 1;
929 }
930 KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
Jonathan Peyton30419822017-05-12 18:01:32 +0000931 "reservation to %d threads\n",
932 master_tid, tl_nthreads));
933 new_nthreads = tl_nthreads;
934 }
935
936 // Check if the threads array is large enough, or needs expanding.
Jonathan Peyton30419822017-05-12 18:01:32 +0000937 // See comment in __kmp_register_root() about the adjustment if
938 // __kmp_threads[0] == NULL.
939 capacity = __kmp_threads_capacity;
940 if (TCR_PTR(__kmp_threads[0]) == NULL) {
941 --capacity;
942 }
943 if (__kmp_nth + new_nthreads -
944 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
945 capacity) {
946 // Expand the threads array.
947 int slotsRequired = __kmp_nth + new_nthreads -
948 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
949 capacity;
Jonathan Peyton1800ece2018-01-10 18:27:01 +0000950 int slotsAdded = __kmp_expand_threads(slotsRequired);
Jonathan Peyton30419822017-05-12 18:01:32 +0000951 if (slotsAdded < slotsRequired) {
952 // The threads array was not expanded enough.
953 new_nthreads -= (slotsRequired - slotsAdded);
954 KMP_ASSERT(new_nthreads >= 1);
955
956 // If dyn-var is false, emit a 1-time warning.
957 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
958 __kmp_reserve_warn = 1;
959 if (__kmp_tp_cached) {
960 __kmp_msg(kmp_ms_warning,
961 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
962 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
963 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
964 } else {
965 __kmp_msg(kmp_ms_warning,
966 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
967 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
968 }
969 }
970 }
971 }
972
Jonathan Peyton642688b2017-06-01 16:46:36 +0000973#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000974 if (new_nthreads == 1) {
975 KC_TRACE(10,
976 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
977 "dead roots and rechecking; requested %d threads\n",
978 __kmp_get_gtid(), set_nthreads));
Jonathan Peyton642688b2017-06-01 16:46:36 +0000979 } else {
980 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
981 " %d threads\n",
982 __kmp_get_gtid(), new_nthreads, set_nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +0000983 }
Jonathan Peyton642688b2017-06-01 16:46:36 +0000984#endif // KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000985 return new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000986}
987
Jonathan Peyton30419822017-05-12 18:01:32 +0000988/* Allocate threads from the thread pool and assign them to the new team. We are
989 assured that there are enough threads available, because we checked on that
990 earlier within critical section forkjoin */
991static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
992 kmp_info_t *master_th, int master_gtid) {
993 int i;
994 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000995
Jonathan Peyton30419822017-05-12 18:01:32 +0000996 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
997 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
998 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000999
Jonathan Peyton30419822017-05-12 18:01:32 +00001000 /* first, let's setup the master thread */
1001 master_th->th.th_info.ds.ds_tid = 0;
1002 master_th->th.th_team = team;
1003 master_th->th.th_team_nproc = team->t.t_nproc;
1004 master_th->th.th_team_master = master_th;
1005 master_th->th.th_team_serialized = FALSE;
1006 master_th->th.th_dispatch = &team->t.t_dispatch[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001007
Jonathan Peyton30419822017-05-12 18:01:32 +00001008/* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001009#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001010 use_hot_team = 0;
1011 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1012 if (hot_teams) { // hot teams array is not allocated if
1013 // KMP_HOT_TEAMS_MAX_LEVEL=0
1014 int level = team->t.t_active_level - 1; // index in array of hot teams
1015 if (master_th->th.th_teams_microtask) { // are we inside the teams?
1016 if (master_th->th.th_teams_size.nteams > 1) {
1017 ++level; // level was not increased in teams construct for
1018 // team_of_masters
1019 }
1020 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1021 master_th->th.th_teams_level == team->t.t_level) {
1022 ++level; // level was not increased in teams construct for
1023 // team_of_workers before the parallel
1024 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001025 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001026 if (level < __kmp_hot_teams_max_level) {
1027 if (hot_teams[level].hot_team) {
1028 // hot team has already been allocated for given level
1029 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1030 use_hot_team = 1; // the team is ready to use
1031 } else {
1032 use_hot_team = 0; // AC: threads are not allocated yet
1033 hot_teams[level].hot_team = team; // remember new hot team
1034 hot_teams[level].hot_team_nth = team->t.t_nproc;
1035 }
1036 } else {
1037 use_hot_team = 0;
1038 }
1039 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001040#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001041 use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001042#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001043 if (!use_hot_team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001044
Jonathan Peyton30419822017-05-12 18:01:32 +00001045 /* install the master thread */
1046 team->t.t_threads[0] = master_th;
1047 __kmp_initialize_info(master_th, team, 0, master_gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001048
Jonathan Peyton30419822017-05-12 18:01:32 +00001049 /* now, install the worker threads */
1050 for (i = 1; i < team->t.t_nproc; i++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001051
Jonathan Peyton30419822017-05-12 18:01:32 +00001052 /* fork or reallocate a new thread and install it in team */
1053 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1054 team->t.t_threads[i] = thr;
1055 KMP_DEBUG_ASSERT(thr);
1056 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1057 /* align team and thread arrived states */
1058 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1059 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1060 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1061 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1062 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1063 team->t.t_bar[bs_plain_barrier].b_arrived));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001064#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001065 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1066 thr->th.th_teams_level = master_th->th.th_teams_level;
1067 thr->th.th_teams_size = master_th->th.th_teams_size;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001068#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001069 { // Initialize threads' barrier data.
1070 int b;
1071 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1072 for (b = 0; b < bs_last_barrier; ++b) {
1073 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1074 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001075#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001076 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001077#endif
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001078 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001079 }
1080 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001081
Alp Toker98758b02014-03-02 04:12:06 +00001082#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001083 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001084#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001085 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001086
Jonathan Peyton30419822017-05-12 18:01:32 +00001087 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001088}
1089
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001090#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001091// Propagate any changes to the floating point control registers out to the team
Jonathan Peyton30419822017-05-12 18:01:32 +00001092// We try to avoid unnecessary writes to the relevant cache line in the team
1093// structure, so we don't make changes unless they are needed.
1094inline static void propagateFPControl(kmp_team_t *team) {
1095 if (__kmp_inherit_fp_control) {
1096 kmp_int16 x87_fpu_control_word;
1097 kmp_uint32 mxcsr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001098
Jonathan Peyton30419822017-05-12 18:01:32 +00001099 // Get master values of FPU control flags (both X87 and vector)
1100 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1101 __kmp_store_mxcsr(&mxcsr);
1102 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001103
Jonathan Peyton94a114f2017-10-20 19:30:57 +00001104 // There is no point looking at t_fp_control_saved here.
1105 // If it is TRUE, we still have to update the values if they are different
Jonas Hahnfeldf0a1c652017-11-03 18:28:19 +00001106 // from those we now have. If it is FALSE we didn't save anything yet, but
1107 // our objective is the same. We have to ensure that the values in the team
1108 // are the same as those we have.
Jonathan Peyton94a114f2017-10-20 19:30:57 +00001109 // So, this code achieves what we need whether or not t_fp_control_saved is
1110 // true. By checking whether the value needs updating we avoid unnecessary
1111 // writes that would put the cache-line into a written state, causing all
1112 // threads in the team to have to read it again.
Jonathan Peyton30419822017-05-12 18:01:32 +00001113 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1114 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1115 // Although we don't use this value, other code in the runtime wants to know
1116 // whether it should restore them. So we must ensure it is correct.
1117 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1118 } else {
1119 // Similarly here. Don't write to this cache-line in the team structure
1120 // unless we have to.
1121 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1122 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001123}
1124
Jonathan Peyton30419822017-05-12 18:01:32 +00001125// Do the opposite, setting the hardware registers to the updated values from
1126// the team.
1127inline static void updateHWFPControl(kmp_team_t *team) {
1128 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1129 // Only reset the fp control regs if they have been changed in the team.
1130 // the parallel region that we are exiting.
1131 kmp_int16 x87_fpu_control_word;
1132 kmp_uint32 mxcsr;
1133 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1134 __kmp_store_mxcsr(&mxcsr);
1135 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001136
Jonathan Peyton30419822017-05-12 18:01:32 +00001137 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1138 __kmp_clear_x87_fpu_status_word();
1139 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001140 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001141
1142 if (team->t.t_mxcsr != mxcsr) {
1143 __kmp_load_mxcsr(&team->t.t_mxcsr);
1144 }
1145 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001146}
1147#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001148#define propagateFPControl(x) ((void)0)
1149#define updateHWFPControl(x) ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001150#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1151
Jonathan Peyton30419822017-05-12 18:01:32 +00001152static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1153 int realloc); // forward declaration
Jim Cownie5e8470a2013-09-27 10:38:44 +00001154
Jonathan Peyton30419822017-05-12 18:01:32 +00001155/* Run a parallel region that has been serialized, so runs only in a team of the
1156 single master thread. */
1157void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1158 kmp_info_t *this_thr;
1159 kmp_team_t *serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001160
Jonathan Peyton30419822017-05-12 18:01:32 +00001161 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001162
Jonathan Peyton30419822017-05-12 18:01:32 +00001163 /* Skip all this code for autopar serialized loops since it results in
1164 unacceptable overhead */
1165 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
1166 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001167
Jonathan Peyton30419822017-05-12 18:01:32 +00001168 if (!TCR_4(__kmp_init_parallel))
1169 __kmp_parallel_initialize();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001170
Jonathan Peyton30419822017-05-12 18:01:32 +00001171 this_thr = __kmp_threads[global_tid];
1172 serial_team = this_thr->th.th_serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001173
Jonathan Peyton30419822017-05-12 18:01:32 +00001174 /* utilize the serialized team held by this thread */
1175 KMP_DEBUG_ASSERT(serial_team);
1176 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001177
Jonathan Peyton30419822017-05-12 18:01:32 +00001178 if (__kmp_tasking_mode != tskm_immediate_exec) {
1179 KMP_DEBUG_ASSERT(
1180 this_thr->th.th_task_team ==
1181 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1182 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1183 NULL);
1184 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1185 "team %p, new task_team = NULL\n",
1186 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1187 this_thr->th.th_task_team = NULL;
1188 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001189
1190#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001191 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1192 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1193 proc_bind = proc_bind_false;
1194 } else if (proc_bind == proc_bind_default) {
1195 // No proc_bind clause was specified, so use the current value
1196 // of proc-bind-var for this parallel region.
1197 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1198 }
1199 // Reset for next parallel region
1200 this_thr->th.th_set_proc_bind = proc_bind_default;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001201#endif /* OMP_40_ENABLED */
1202
Joachim Protze82e94a52017-11-01 10:08:30 +00001203#if OMPT_SUPPORT
1204 ompt_data_t ompt_parallel_data;
1205 ompt_parallel_data.ptr = NULL;
1206 ompt_data_t *implicit_task_data;
1207 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1208 if (ompt_enabled.enabled &&
1209 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1210
1211 ompt_task_info_t *parent_task_info;
1212 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1213
Joachim Protzec255ca72017-11-05 14:11:10 +00001214 parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001215 if (ompt_enabled.ompt_callback_parallel_begin) {
1216 int team_size = 1;
1217
1218 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1219 &(parent_task_info->task_data), &(parent_task_info->frame),
1220 &ompt_parallel_data, team_size, ompt_invoker_program, codeptr);
1221 }
1222 }
1223#endif // OMPT_SUPPORT
1224
Jonathan Peyton30419822017-05-12 18:01:32 +00001225 if (this_thr->th.th_team != serial_team) {
1226 // Nested level will be an index in the nested nthreads array
1227 int level = this_thr->th.th_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001228
Jonathan Peyton30419822017-05-12 18:01:32 +00001229 if (serial_team->t.t_serialized) {
1230 /* this serial team was already used
1231 TODO increase performance by making this locks more specific */
1232 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001233
Jonathan Peyton30419822017-05-12 18:01:32 +00001234 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001235
Jonathan Peyton30419822017-05-12 18:01:32 +00001236 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001237#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001238 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001239#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001240#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001241 proc_bind,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001242#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001243 &this_thr->th.th_current_task->td_icvs,
1244 0 USE_NESTED_HOT_ARG(NULL));
1245 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1246 KMP_ASSERT(new_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001247
Jonathan Peyton30419822017-05-12 18:01:32 +00001248 /* setup new serialized team and install it */
1249 new_team->t.t_threads[0] = this_thr;
1250 new_team->t.t_parent = this_thr->th.th_team;
1251 serial_team = new_team;
1252 this_thr->th.th_serial_team = serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001253
Jonathan Peyton30419822017-05-12 18:01:32 +00001254 KF_TRACE(
1255 10,
1256 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1257 global_tid, serial_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001258
Jonathan Peyton30419822017-05-12 18:01:32 +00001259 /* TODO the above breaks the requirement that if we run out of resources,
1260 then we can still guarantee that serialized teams are ok, since we may
1261 need to allocate a new one */
1262 } else {
1263 KF_TRACE(
1264 10,
1265 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1266 global_tid, serial_team));
1267 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001268
Jonathan Peyton30419822017-05-12 18:01:32 +00001269 /* we have to initialize this serial team */
1270 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1271 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1272 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1273 serial_team->t.t_ident = loc;
1274 serial_team->t.t_serialized = 1;
1275 serial_team->t.t_nproc = 1;
1276 serial_team->t.t_parent = this_thr->th.th_team;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00001277 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00001278 this_thr->th.th_team = serial_team;
1279 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001280
Jonathan Peyton30419822017-05-12 18:01:32 +00001281 KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1282 this_thr->th.th_current_task));
1283 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1284 this_thr->th.th_current_task->td_flags.executing = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001285
Jonathan Peyton30419822017-05-12 18:01:32 +00001286 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001287
Jonathan Peyton30419822017-05-12 18:01:32 +00001288 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1289 implicit task for each serialized task represented by
1290 team->t.t_serialized? */
1291 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1292 &this_thr->th.th_current_task->td_parent->td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001293
Jonathan Peyton30419822017-05-12 18:01:32 +00001294 // Thread value exists in the nested nthreads array for the next nested
1295 // level
1296 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1297 this_thr->th.th_current_task->td_icvs.nproc =
1298 __kmp_nested_nth.nth[level + 1];
1299 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001300
1301#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001302 if (__kmp_nested_proc_bind.used &&
1303 (level + 1 < __kmp_nested_proc_bind.used)) {
1304 this_thr->th.th_current_task->td_icvs.proc_bind =
1305 __kmp_nested_proc_bind.bind_types[level + 1];
1306 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001307#endif /* OMP_40_ENABLED */
1308
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001309#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001310 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001311#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001312 this_thr->th.th_info.ds.ds_tid = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001313
Jonathan Peyton30419822017-05-12 18:01:32 +00001314 /* set thread cache values */
1315 this_thr->th.th_team_nproc = 1;
1316 this_thr->th.th_team_master = this_thr;
1317 this_thr->th.th_team_serialized = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001318
Jonathan Peyton30419822017-05-12 18:01:32 +00001319 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1320 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001321
Jonathan Peyton30419822017-05-12 18:01:32 +00001322 propagateFPControl(serial_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001323
Jonathan Peyton30419822017-05-12 18:01:32 +00001324 /* check if we need to allocate dispatch buffers stack */
1325 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1326 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1327 serial_team->t.t_dispatch->th_disp_buffer =
1328 (dispatch_private_info_t *)__kmp_allocate(
1329 sizeof(dispatch_private_info_t));
1330 }
1331 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001332
Jonathan Peyton30419822017-05-12 18:01:32 +00001333 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001334
Jonathan Peyton30419822017-05-12 18:01:32 +00001335 } else {
1336 /* this serialized team is already being used,
1337 * that's fine, just add another nested level */
1338 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1339 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1340 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1341 ++serial_team->t.t_serialized;
1342 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001343
Jonathan Peyton30419822017-05-12 18:01:32 +00001344 // Nested level will be an index in the nested nthreads array
1345 int level = this_thr->th.th_team->t.t_level;
1346 // Thread value exists in the nested nthreads array for the next nested
1347 // level
1348 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1349 this_thr->th.th_current_task->td_icvs.nproc =
1350 __kmp_nested_nth.nth[level + 1];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001351 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001352 serial_team->t.t_level++;
1353 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
1354 "of serial team %p to %d\n",
1355 global_tid, serial_team, serial_team->t.t_level));
1356
1357 /* allocate/push dispatch buffers stack */
1358 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1359 {
1360 dispatch_private_info_t *disp_buffer =
1361 (dispatch_private_info_t *)__kmp_allocate(
1362 sizeof(dispatch_private_info_t));
1363 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1364 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1365 }
1366 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1367
1368 KMP_MB();
1369 }
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001370#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001371 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001372#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001373
Jonathan Peyton30419822017-05-12 18:01:32 +00001374 if (__kmp_env_consistency_check)
1375 __kmp_push_parallel(global_tid, NULL);
Joachim Protze82e94a52017-11-01 10:08:30 +00001376#if OMPT_SUPPORT
1377 serial_team->t.ompt_team_info.master_return_address = codeptr;
1378 if (ompt_enabled.enabled &&
1379 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001380 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001381
1382 ompt_lw_taskteam_t lw_taskteam;
1383 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1384 &ompt_parallel_data, codeptr);
1385
1386 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1387 // don't use lw_taskteam after linking. content was swaped
1388
1389 /* OMPT implicit task begin */
1390 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1391 if (ompt_enabled.ompt_callback_implicit_task) {
1392 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1393 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1394 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid));
Joachim Protze9be9cf22018-05-07 12:42:21 +00001395 OMPT_CUR_TASK_INFO(this_thr)
1396 ->thread_num = __kmp_tid_from_gtid(global_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +00001397 }
1398
1399 /* OMPT state */
1400 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
Joachim Protzec255ca72017-11-05 14:11:10 +00001401 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
Joachim Protze82e94a52017-11-01 10:08:30 +00001402 }
1403#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001404}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001405
Jim Cownie5e8470a2013-09-27 10:38:44 +00001406/* most of the work for a fork */
1407/* return true if we really went parallel, false if serialized */
Jonathan Peyton30419822017-05-12 18:01:32 +00001408int __kmp_fork_call(ident_t *loc, int gtid,
1409 enum fork_context_e call_context, // Intel, GNU, ...
Joachim Protze82e94a52017-11-01 10:08:30 +00001410 kmp_int32 argc, microtask_t microtask, launch_t invoker,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001411/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001412#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001413 va_list *ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001414#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001415 va_list ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001416#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001417 ) {
1418 void **argv;
1419 int i;
1420 int master_tid;
1421 int master_this_cons;
1422 kmp_team_t *team;
1423 kmp_team_t *parent_team;
1424 kmp_info_t *master_th;
1425 kmp_root_t *root;
1426 int nthreads;
1427 int master_active;
1428 int master_set_numthreads;
1429 int level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001430#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001431 int active_level;
1432 int teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001433#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001434#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001435 kmp_hot_team_ptr_t **p_hot_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001436#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001437 { // KMP_TIME_BLOCK
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001438 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001439 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001440
Jonathan Peyton30419822017-05-12 18:01:32 +00001441 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid));
1442 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1443 /* Some systems prefer the stack for the root thread(s) to start with */
1444 /* some gap from the parent stack to prevent false sharing. */
1445 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1446 /* These 2 lines below are so this does not get optimized out */
1447 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1448 __kmp_stkpadding += (short)((kmp_int64)dummy);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001449 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001450
1451 /* initialize if needed */
Jonathan Peyton30419822017-05-12 18:01:32 +00001452 KMP_DEBUG_ASSERT(
1453 __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown
1454 if (!TCR_4(__kmp_init_parallel))
1455 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001456
1457 /* setup current data */
Jonathan Peyton30419822017-05-12 18:01:32 +00001458 master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with
1459 // shutdown
1460 parent_team = master_th->th.th_team;
1461 master_tid = master_th->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001462 master_this_cons = master_th->th.th_local.this_construct;
Jonathan Peyton30419822017-05-12 18:01:32 +00001463 root = master_th->th.th_root;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001464 master_active = root->r.r_active;
1465 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001466
1467#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001468 ompt_data_t ompt_parallel_data;
1469 ompt_parallel_data.ptr = NULL;
1470 ompt_data_t *parent_task_data;
Joachim Protzec5836064b2018-05-28 08:14:58 +00001471 omp_frame_t *ompt_frame;
Joachim Protze82e94a52017-11-01 10:08:30 +00001472 ompt_data_t *implicit_task_data;
1473 void *return_address = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001474
Joachim Protze82e94a52017-11-01 10:08:30 +00001475 if (ompt_enabled.enabled) {
1476 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1477 NULL, NULL);
1478 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001479 }
1480#endif
1481
Jim Cownie5e8470a2013-09-27 10:38:44 +00001482 // Nested level will be an index in the nested nthreads array
Jonathan Peyton30419822017-05-12 18:01:32 +00001483 level = parent_team->t.t_level;
1484 // used to launch non-serial teams even if nested is not allowed
1485 active_level = parent_team->t.t_active_level;
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001486#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00001487 // needed to check nesting inside the teams
1488 teams_level = master_th->th.th_teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001489#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001490#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001491 p_hot_teams = &master_th->th.th_hot_teams;
1492 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1493 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1494 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1495 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
Jonathan Peyton642688b2017-06-01 16:46:36 +00001496 // it is either actual or not needed (when active_level > 0)
1497 (*p_hot_teams)[0].hot_team_nth = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001498 }
1499#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001500
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001501#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001502 if (ompt_enabled.enabled) {
1503 if (ompt_enabled.ompt_callback_parallel_begin) {
1504 int team_size = master_set_numthreads
1505 ? master_set_numthreads
1506 : get__nproc_2(parent_team, master_tid);
1507 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1508 parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
1509 OMPT_INVOKER(call_context), return_address);
1510 }
1511 master_th->th.ompt_thread_info.state = omp_state_overhead;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001512 }
1513#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001514
Jim Cownie5e8470a2013-09-27 10:38:44 +00001515 master_th->th.th_ident = loc;
1516
1517#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001518 if (master_th->th.th_teams_microtask && ap &&
1519 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1520 // AC: This is start of parallel that is nested inside teams construct.
1521 // The team is actual (hot), all workers are ready at the fork barrier.
1522 // No lock needed to initialize the team a bit, then free workers.
1523 parent_team->t.t_ident = loc;
1524 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1525 parent_team->t.t_argc = argc;
1526 argv = (void **)parent_team->t.t_argv;
1527 for (i = argc - 1; i >= 0; --i)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001528/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001529#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001530 *argv++ = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001531#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001532 *argv++ = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001533#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001534 // Increment our nested depth levels, but not increase the serialization
1535 if (parent_team == master_th->th.th_serial_team) {
1536 // AC: we are in serialized parallel
1537 __kmpc_serialized_parallel(loc, gtid);
1538 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1539 // AC: need this in order enquiry functions work
1540 // correctly, will restore at join time
1541 parent_team->t.t_serialized--;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001542#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001543 void *dummy;
1544 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001545
Jonathan Peyton30419822017-05-12 18:01:32 +00001546 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001547
Joachim Protze82e94a52017-11-01 10:08:30 +00001548 if (ompt_enabled.enabled) {
1549 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1550 &ompt_parallel_data, return_address);
Joachim Protzec255ca72017-11-05 14:11:10 +00001551 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001552
Joachim Protze82e94a52017-11-01 10:08:30 +00001553 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1554 // don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001555
Jonathan Peyton30419822017-05-12 18:01:32 +00001556 /* OMPT implicit task begin */
Joachim Protze82e94a52017-11-01 10:08:30 +00001557 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1558 if (ompt_enabled.ompt_callback_implicit_task) {
1559 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1560 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1561 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
Joachim Protze9be9cf22018-05-07 12:42:21 +00001562 OMPT_CUR_TASK_INFO(master_th)
1563 ->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001564 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001565
Jonathan Peyton30419822017-05-12 18:01:32 +00001566 /* OMPT state */
Joachim Protze82e94a52017-11-01 10:08:30 +00001567 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001568 } else {
1569 exit_runtime_p = &dummy;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001570 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001571#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001572
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001573 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001574 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1575 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1576 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1577#if OMPT_SUPPORT
1578 ,
1579 exit_runtime_p
1580#endif
1581 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001582 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001583
Jonathan Peyton30419822017-05-12 18:01:32 +00001584#if OMPT_SUPPORT
1585 *exit_runtime_p = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001586 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00001587 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001588 if (ompt_enabled.ompt_callback_implicit_task) {
1589 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1590 ompt_scope_end, NULL, implicit_task_data, 1,
Joachim Protze9be9cf22018-05-07 12:42:21 +00001591 OMPT_CUR_TASK_INFO(master_th)->thread_num);
Jonathan Peyton30419822017-05-12 18:01:32 +00001592 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001593 __ompt_lw_taskteam_unlink(master_th);
Jonathan Peyton30419822017-05-12 18:01:32 +00001594
Joachim Protze82e94a52017-11-01 10:08:30 +00001595 if (ompt_enabled.ompt_callback_parallel_end) {
1596 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1597 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
1598 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001599 }
Joachim Protze82e94a52017-11-01 10:08:30 +00001600 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001601 }
1602#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001603 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +00001604 }
1605
1606 parent_team->t.t_pkfn = microtask;
Jonathan Peyton30419822017-05-12 18:01:32 +00001607 parent_team->t.t_invoke = invoker;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001608 KMP_ATOMIC_INC(&root->r.r_in_parallel);
Jonathan Peyton30419822017-05-12 18:01:32 +00001609 parent_team->t.t_active_level++;
1610 parent_team->t.t_level++;
1611
1612 /* Change number of threads in the team if requested */
1613 if (master_set_numthreads) { // The parallel has num_threads clause
1614 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1615 // AC: only can reduce number of threads dynamically, can't increase
1616 kmp_info_t **other_threads = parent_team->t.t_threads;
1617 parent_team->t.t_nproc = master_set_numthreads;
1618 for (i = 0; i < master_set_numthreads; ++i) {
1619 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1620 }
1621 // Keep extra threads hot in the team for possible next parallels
1622 }
1623 master_th->th.th_set_nproc = 0;
1624 }
1625
1626#if USE_DEBUGGER
1627 if (__kmp_debugging) { // Let debugger override number of threads.
1628 int nth = __kmp_omp_num_threads(loc);
Jonathan Peyton642688b2017-06-01 16:46:36 +00001629 if (nth > 0) { // 0 means debugger doesn't want to change num threads
Jonathan Peyton30419822017-05-12 18:01:32 +00001630 master_set_numthreads = nth;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001631 }
1632 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001633#endif
1634
1635 KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
1636 "master_th=%p, gtid=%d\n",
1637 root, parent_team, master_th, gtid));
1638 __kmp_internal_fork(loc, gtid, parent_team);
1639 KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, "
1640 "master_th=%p, gtid=%d\n",
1641 root, parent_team, master_th, gtid));
1642
1643 /* Invoke microtask for MASTER thread */
1644 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1645 parent_team->t.t_id, parent_team->t.t_pkfn));
1646
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001647 if (!parent_team->t.t_invoke(gtid)) {
1648 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
Jonathan Peyton30419822017-05-12 18:01:32 +00001649 }
1650 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1651 parent_team->t.t_id, parent_team->t.t_pkfn));
1652 KMP_MB(); /* Flush all pending memory write invalidates. */
1653
1654 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
1655
1656 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001657 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001658#endif /* OMP_40_ENABLED */
1659
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001660#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001661 if (__kmp_tasking_mode != tskm_immediate_exec) {
1662 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1663 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001664 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001665#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001666
Jonathan Peyton30419822017-05-12 18:01:32 +00001667 if (parent_team->t.t_active_level >=
1668 master_th->th.th_current_task->td_icvs.max_active_levels) {
1669 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001670 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001671#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001672 int enter_teams = ((ap == NULL && active_level == 0) ||
1673 (ap && teams_level > 0 && teams_level == level));
Andrey Churbanov92effc42015-08-18 10:08:27 +00001674#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001675 nthreads =
1676 master_set_numthreads
1677 ? master_set_numthreads
1678 : get__nproc_2(
1679 parent_team,
1680 master_tid); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001681
Jonathan Peyton30419822017-05-12 18:01:32 +00001682 // Check if we need to take forkjoin lock? (no need for serialized
1683 // parallel out of teams construct). This code moved here from
1684 // __kmp_reserve_threads() to speedup nested serialized parallels.
1685 if (nthreads > 1) {
1686 if ((!get__nested(master_th) && (root->r.r_in_parallel
Andrey Churbanov92effc42015-08-18 10:08:27 +00001687#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001688 && !enter_teams
Andrey Churbanov92effc42015-08-18 10:08:27 +00001689#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001690 )) ||
1691 (__kmp_library == library_serial)) {
Jonathan Peyton642688b2017-06-01 16:46:36 +00001692 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"
1693 " threads\n",
1694 gtid, nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +00001695 nthreads = 1;
Andrey Churbanov92effc42015-08-18 10:08:27 +00001696 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001697 }
1698 if (nthreads > 1) {
1699 /* determine how many new threads we can use */
1700 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jonathan Peyton30419822017-05-12 18:01:32 +00001701 nthreads = __kmp_reserve_threads(
1702 root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001703#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001704 /* AC: If we execute teams from parallel region (on host), then
1705 teams should be created but each can only have 1 thread if
1706 nesting is disabled. If teams called from serial region, then
1707 teams and their threads should be created regardless of the
1708 nesting setting. */
1709 ,
1710 enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001711#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001712 );
1713 if (nthreads == 1) {
1714 // Free lock for single thread execution here; for multi-thread
1715 // execution it will be freed later after team of threads created
1716 // and initialized
1717 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Andrey Churbanov92effc42015-08-18 10:08:27 +00001718 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001719 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001720 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001721 KMP_DEBUG_ASSERT(nthreads > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001722
Jonathan Peyton30419822017-05-12 18:01:32 +00001723 // If we temporarily changed the set number of threads then restore it now
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001724 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001725
Jim Cownie5e8470a2013-09-27 10:38:44 +00001726 /* create a serialized parallel region? */
Jonathan Peyton30419822017-05-12 18:01:32 +00001727 if (nthreads == 1) {
1728/* josh todo: hypothetical question: what do we do for OS X*? */
1729#if KMP_OS_LINUX && \
1730 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1731 void *args[argc];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001732#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001733 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *));
1734#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1735 KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001736
Jonathan Peyton30419822017-05-12 18:01:32 +00001737 KA_TRACE(20,
1738 ("__kmp_fork_call: T#%d serializing parallel region\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001739
Jonathan Peyton30419822017-05-12 18:01:32 +00001740 __kmpc_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001741
Jonathan Peyton30419822017-05-12 18:01:32 +00001742 if (call_context == fork_context_intel) {
1743 /* TODO this sucks, use the compiler itself to pass args! :) */
1744 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001745#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001746 if (!ap) {
1747 // revert change made in __kmpc_serialized_parallel()
1748 master_th->th.th_serial_team->t.t_level--;
1749// Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001750
1751#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001752 void *dummy;
1753 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00001754 ompt_task_info_t *task_info;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001755
Jonathan Peyton30419822017-05-12 18:01:32 +00001756 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001757
Joachim Protze82e94a52017-11-01 10:08:30 +00001758 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001759 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
Joachim Protze82e94a52017-11-01 10:08:30 +00001760 &ompt_parallel_data, return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001761
Joachim Protze82e94a52017-11-01 10:08:30 +00001762 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1763 // don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001764
Joachim Protze82e94a52017-11-01 10:08:30 +00001765 task_info = OMPT_CUR_TASK_INFO(master_th);
Joachim Protzec255ca72017-11-05 14:11:10 +00001766 exit_runtime_p = &(task_info->frame.exit_frame);
Joachim Protze82e94a52017-11-01 10:08:30 +00001767 if (ompt_enabled.ompt_callback_implicit_task) {
1768 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1769 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1770 &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid));
Joachim Protze9be9cf22018-05-07 12:42:21 +00001771 OMPT_CUR_TASK_INFO(master_th)
1772 ->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001773 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001774
Jonathan Peyton30419822017-05-12 18:01:32 +00001775 /* OMPT state */
Joachim Protze82e94a52017-11-01 10:08:30 +00001776 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001777 } else {
1778 exit_runtime_p = &dummy;
1779 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001780#endif
1781
Jonathan Peyton30419822017-05-12 18:01:32 +00001782 {
1783 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1784 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1785 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1786 parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001787#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001788 ,
1789 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001790#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001791 );
1792 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001793
1794#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001795 if (ompt_enabled.enabled) {
1796 exit_runtime_p = NULL;
1797 if (ompt_enabled.ompt_callback_implicit_task) {
1798 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1799 ompt_scope_end, NULL, &(task_info->task_data), 1,
Joachim Protze9be9cf22018-05-07 12:42:21 +00001800 OMPT_CUR_TASK_INFO(master_th)->thread_num);
Jonathan Peyton30419822017-05-12 18:01:32 +00001801 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001802
Jonathan Peyton30419822017-05-12 18:01:32 +00001803 __ompt_lw_taskteam_unlink(master_th);
Joachim Protze82e94a52017-11-01 10:08:30 +00001804 if (ompt_enabled.ompt_callback_parallel_end) {
1805 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1806 OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
1807 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001808 }
Joachim Protze82e94a52017-11-01 10:08:30 +00001809 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001810 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001811#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001812 } else if (microtask == (microtask_t)__kmp_teams_master) {
1813 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1814 master_th->th.th_serial_team);
1815 team = master_th->th.th_team;
1816 // team->t.t_pkfn = microtask;
1817 team->t.t_invoke = invoker;
1818 __kmp_alloc_argv_entries(argc, team, TRUE);
1819 team->t.t_argc = argc;
1820 argv = (void **)team->t.t_argv;
1821 if (ap) {
1822 for (i = argc - 1; i >= 0; --i)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001823// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001824#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001825 *argv++ = va_arg(*ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001826#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001827 *argv++ = va_arg(ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001828#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001829 } else {
1830 for (i = 0; i < argc; ++i)
1831 // Get args from parent team for teams construct
1832 argv[i] = parent_team->t.t_argv[i];
1833 }
1834 // AC: revert change made in __kmpc_serialized_parallel()
1835 // because initial code in teams should have level=0
1836 team->t.t_level--;
1837 // AC: call special invoker for outer "parallel" of teams construct
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00001838 invoker(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001839 } else {
1840#endif /* OMP_40_ENABLED */
1841 argv = args;
1842 for (i = argc - 1; i >= 0; --i)
1843// TODO: revert workaround for Intel(R) 64 tracker #96
1844#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1845 *argv++ = va_arg(*ap, void *);
1846#else
1847 *argv++ = va_arg(ap, void *);
1848#endif
1849 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001850
1851#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001852 void *dummy;
1853 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00001854 ompt_task_info_t *task_info;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001855
Jonathan Peyton30419822017-05-12 18:01:32 +00001856 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001857
Joachim Protze82e94a52017-11-01 10:08:30 +00001858 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001859 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
Joachim Protze82e94a52017-11-01 10:08:30 +00001860 &ompt_parallel_data, return_address);
1861 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1862 // don't use lw_taskteam after linking. content was swaped
1863 task_info = OMPT_CUR_TASK_INFO(master_th);
Joachim Protzec255ca72017-11-05 14:11:10 +00001864 exit_runtime_p = &(task_info->frame.exit_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001865
Jonathan Peyton30419822017-05-12 18:01:32 +00001866 /* OMPT implicit task begin */
Joachim Protze82e94a52017-11-01 10:08:30 +00001867 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1868 if (ompt_enabled.ompt_callback_implicit_task) {
1869 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1870 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1871 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
Joachim Protze9be9cf22018-05-07 12:42:21 +00001872 OMPT_CUR_TASK_INFO(master_th)
1873 ->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001874 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001875
Jonathan Peyton30419822017-05-12 18:01:32 +00001876 /* OMPT state */
Joachim Protze82e94a52017-11-01 10:08:30 +00001877 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001878 } else {
1879 exit_runtime_p = &dummy;
1880 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001881#endif
1882
Jonathan Peyton30419822017-05-12 18:01:32 +00001883 {
1884 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1885 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1886 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001887#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001888 ,
1889 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001890#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001891 );
1892 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001893
1894#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001895 if (ompt_enabled.enabled) {
1896 *exit_runtime_p = NULL;
1897 if (ompt_enabled.ompt_callback_implicit_task) {
1898 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1899 ompt_scope_end, NULL, &(task_info->task_data), 1,
Joachim Protze9be9cf22018-05-07 12:42:21 +00001900 OMPT_CUR_TASK_INFO(master_th)->thread_num);
Jonathan Peyton30419822017-05-12 18:01:32 +00001901 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001902
Joachim Protze82e94a52017-11-01 10:08:30 +00001903 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
Jonathan Peyton30419822017-05-12 18:01:32 +00001904 __ompt_lw_taskteam_unlink(master_th);
Joachim Protze82e94a52017-11-01 10:08:30 +00001905 if (ompt_enabled.ompt_callback_parallel_end) {
1906 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1907 &ompt_parallel_data, parent_task_data,
1908 OMPT_INVOKER(call_context), return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001909 }
Joachim Protze82e94a52017-11-01 10:08:30 +00001910 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00001911 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001912#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001913#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001914 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001915#endif /* OMP_40_ENABLED */
1916 } else if (call_context == fork_context_gnu) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001917#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001918 ompt_lw_taskteam_t lwt;
1919 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1920 return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001921
Joachim Protzec255ca72017-11-05 14:11:10 +00001922 lwt.ompt_task_info.frame.exit_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00001923 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1924// don't use lw_taskteam after linking. content was swaped
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001925#endif
1926
Jonathan Peyton30419822017-05-12 18:01:32 +00001927 // we were called from GNU native code
1928 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001929 return FALSE;
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001930 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001931 KMP_ASSERT2(call_context < fork_context_last,
1932 "__kmp_fork_call: unknown fork_context parameter");
1933 }
1934
1935 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
1936 KMP_MB();
1937 return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001938 }
1939
Jim Cownie5e8470a2013-09-27 10:38:44 +00001940 // GEH: only modify the executing flag in the case when not serialized
1941 // serialized case is handled in kmpc_serialized_parallel
Jonathan Peyton30419822017-05-12 18:01:32 +00001942 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1943 "curtask=%p, curtask_max_aclevel=%d\n",
1944 parent_team->t.t_active_level, master_th,
1945 master_th->th.th_current_task,
1946 master_th->th.th_current_task->td_icvs.max_active_levels));
1947 // TODO: GEH - cannot do this assertion because root thread not set up as
1948 // executing
Jim Cownie5e8470a2013-09-27 10:38:44 +00001949 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1950 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001951
1952#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001953 if (!master_th->th.th_teams_microtask || level > teams_level)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001954#endif /* OMP_40_ENABLED */
1955 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001956 /* Increment our nested depth level */
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00001957 KMP_ATOMIC_INC(&root->r.r_in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001958 }
1959
Jim Cownie5e8470a2013-09-27 10:38:44 +00001960 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001961 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jonathan Peyton30419822017-05-12 18:01:32 +00001962 if ((level + 1 < __kmp_nested_nth.used) &&
1963 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1964 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1965 } else {
1966 nthreads_icv = 0; // don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001967 }
1968
1969#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001970 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001971 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jonathan Peyton30419822017-05-12 18:01:32 +00001972 kmp_proc_bind_t proc_bind_icv =
1973 proc_bind_default; // proc_bind_default means don't update
1974 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1975 proc_bind = proc_bind_false;
1976 } else {
1977 if (proc_bind == proc_bind_default) {
1978 // No proc_bind clause specified; use current proc-bind-var for this
1979 // parallel region
1980 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1981 }
1982 /* else: The proc_bind policy was specified explicitly on parallel clause.
1983 This overrides proc-bind-var for this parallel region, but does not
1984 change proc-bind-var. */
1985 // Figure the value of proc-bind-var for the child threads.
1986 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1987 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1988 master_th->th.th_current_task->td_icvs.proc_bind)) {
1989 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1990 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001991 }
1992
Jim Cownie5e8470a2013-09-27 10:38:44 +00001993 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001994 master_th->th.th_set_proc_bind = proc_bind_default;
1995#endif /* OMP_40_ENABLED */
1996
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001997 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001998#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001999 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002000#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002001 ) {
2002 kmp_internal_control_t new_icvs;
2003 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2004 new_icvs.next = NULL;
2005 if (nthreads_icv > 0) {
2006 new_icvs.nproc = nthreads_icv;
2007 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002008
2009#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002010 if (proc_bind_icv != proc_bind_default) {
2011 new_icvs.proc_bind = proc_bind_icv;
2012 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002013#endif /* OMP_40_ENABLED */
2014
Jonathan Peyton30419822017-05-12 18:01:32 +00002015 /* allocate a new parallel team */
2016 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2017 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002018#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002019 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002020#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002021#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002022 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002023#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002024 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002025 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002026 /* allocate a new parallel team */
2027 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
2028 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002029#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002030 ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002031#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002032#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002033 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00002034#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002035 &master_th->th.th_current_task->td_icvs,
2036 argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002037 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002038 KF_TRACE(
2039 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002040
2041 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002042 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2043 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2044 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2045 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2046 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002047#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002048 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2049 return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002050#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002051 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
2052// TODO: parent_team->t.t_level == INT_MAX ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00002053#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002054 if (!master_th->th.th_teams_microtask || level > teams_level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002055#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002056 int new_level = parent_team->t.t_level + 1;
2057 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2058 new_level = parent_team->t.t_active_level + 1;
2059 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002060#if OMP_40_ENABLED
2061 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002062 // AC: Do not increase parallel level at start of the teams construct
2063 int new_level = parent_team->t.t_level;
2064 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2065 new_level = parent_team->t.t_active_level;
2066 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002067 }
2068#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002069 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00002070 // set master's schedule as new run-time schedule
2071 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002072
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002073#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002074 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002075#endif
2076
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002077 // Update the floating point rounding in the team if required.
2078 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002079
Jonathan Peyton30419822017-05-12 18:01:32 +00002080 if (__kmp_tasking_mode != tskm_immediate_exec) {
2081 // Set master's task team to team's task team. Unless this is hot team, it
2082 // should be NULL.
Jonathan Peyton30419822017-05-12 18:01:32 +00002083 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2084 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peyton30419822017-05-12 18:01:32 +00002085 KA_TRACE(20, ("__kmp_fork_call: Master T#%d pushing task_team %p / team "
2086 "%p, new task_team %p / team %p\n",
2087 __kmp_gtid_from_thread(master_th),
2088 master_th->th.th_task_team, parent_team,
2089 team->t.t_task_team[master_th->th.th_task_state], team));
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002090
Jonathan Peyton30419822017-05-12 18:01:32 +00002091 if (active_level || master_th->th.th_task_team) {
2092 // Take a memo of master's task_state
2093 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2094 if (master_th->th.th_task_state_top >=
2095 master_th->th.th_task_state_stack_sz) { // increase size
2096 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2097 kmp_uint8 *old_stack, *new_stack;
2098 kmp_uint32 i;
2099 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2100 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2101 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2102 }
2103 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2104 ++i) { // zero-init rest of stack
2105 new_stack[i] = 0;
2106 }
2107 old_stack = master_th->th.th_task_state_memo_stack;
2108 master_th->th.th_task_state_memo_stack = new_stack;
2109 master_th->th.th_task_state_stack_sz = new_size;
2110 __kmp_free(old_stack);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002111 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002112 // Store master's task_state on stack
2113 master_th->th
2114 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2115 master_th->th.th_task_state;
2116 master_th->th.th_task_state_top++;
2117#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton642688b2017-06-01 16:46:36 +00002118 if (team == master_th->th.th_hot_teams[active_level].hot_team) {
2119 // Restore master's nested state if nested hot team
Jonathan Peyton30419822017-05-12 18:01:32 +00002120 master_th->th.th_task_state =
2121 master_th->th
2122 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2123 } else {
2124#endif
2125 master_th->th.th_task_state = 0;
2126#if KMP_NESTED_HOT_TEAMS
2127 }
2128#endif
2129 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002130#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002131 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2132 (team == root->r.r_hot_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002133#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002134 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002135
Jonathan Peyton30419822017-05-12 18:01:32 +00002136 KA_TRACE(
2137 20,
2138 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2139 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2140 team->t.t_nproc));
2141 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2142 (team->t.t_master_tid == 0 &&
2143 (team->t.t_parent == root->r.r_root_team ||
2144 team->t.t_parent->t.t_serialized)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002145 KMP_MB();
2146
2147 /* now, setup the arguments */
Jonathan Peyton30419822017-05-12 18:01:32 +00002148 argv = (void **)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002149#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002150 if (ap) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002151#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002152 for (i = argc - 1; i >= 0; --i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002153// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002154#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00002155 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002156#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002157 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002158#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002159 KMP_CHECK_UPDATE(*argv, new_argv);
2160 argv++;
2161 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002162#if OMP_40_ENABLED
2163 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002164 for (i = 0; i < argc; ++i) {
2165 // Get args from parent team for teams construct
2166 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2167 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002168 }
2169#endif /* OMP_40_ENABLED */
2170
2171 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002172 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002173 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
Jonathan Peyton30419822017-05-12 18:01:32 +00002174 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002175
Jonathan Peyton30419822017-05-12 18:01:32 +00002176 __kmp_fork_team_threads(root, team, master_th, gtid);
2177 __kmp_setup_icv_copy(team, nthreads,
2178 &master_th->th.th_current_task->td_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002179
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002180#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002181 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002182#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002183
Jonathan Peyton30419822017-05-12 18:01:32 +00002184 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002185
Jim Cownie5e8470a2013-09-27 10:38:44 +00002186#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002187 if (team->t.t_active_level == 1 // only report frames at level 1
2188#if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002189 && !master_th->th.th_teams_microtask // not in teams construct
Jonathan Peyton30419822017-05-12 18:01:32 +00002190#endif /* OMP_40_ENABLED */
2191 ) {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002192#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002193 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2194 (__kmp_forkjoin_frames_mode == 3 ||
2195 __kmp_forkjoin_frames_mode == 1)) {
2196 kmp_uint64 tmp_time = 0;
2197 if (__itt_get_timestamp_ptr)
2198 tmp_time = __itt_get_timestamp();
2199 // Internal fork - report frame begin
2200 master_th->th.th_frame_time = tmp_time;
2201 if (__kmp_forkjoin_frames_mode == 3)
2202 team->t.t_region_time = tmp_time;
Jonathan Peyton642688b2017-06-01 16:46:36 +00002203 } else
2204// only one notification scheme (either "submit" or "forking/joined", not both)
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002205#endif /* USE_ITT_NOTIFY */
Jonathan Peyton30419822017-05-12 18:01:32 +00002206 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2207 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
Jonathan Peyton8c432f22018-01-04 22:56:47 +00002208 // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.
Jonathan Peyton30419822017-05-12 18:01:32 +00002209 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2210 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002211 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002212#endif /* USE_ITT_BUILD */
2213
2214 /* now go on and do the work */
Jonathan Peyton30419822017-05-12 18:01:32 +00002215 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002216 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00002217 KF_TRACE(10,
2218 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2219 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002220
2221#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002222 if (__itt_stack_caller_create_ptr) {
2223 team->t.t_stack_id =
2224 __kmp_itt_stack_caller_create(); // create new stack stitching id
2225 // before entering fork barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002226 }
2227#endif /* USE_ITT_BUILD */
2228
2229#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00002230 // AC: skip __kmp_internal_fork at teams construct, let only master
2231 // threads execute
2232 if (ap)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002233#endif /* OMP_40_ENABLED */
2234 {
Jonathan Peyton30419822017-05-12 18:01:32 +00002235 __kmp_internal_fork(loc, gtid, team);
2236 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "
2237 "master_th=%p, gtid=%d\n",
2238 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002239 }
2240
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002241 if (call_context == fork_context_gnu) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002242 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2243 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002244 }
2245
2246 /* Invoke microtask for MASTER thread */
Jonathan Peyton30419822017-05-12 18:01:32 +00002247 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2248 team->t.t_id, team->t.t_pkfn));
2249 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002250
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00002251 if (!team->t.t_invoke(gtid)) {
2252 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
Jonathan Peyton30419822017-05-12 18:01:32 +00002253 }
2254 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2255 team->t.t_id, team->t.t_pkfn));
2256 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002257
Jonathan Peyton30419822017-05-12 18:01:32 +00002258 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002259
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002260#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002261 if (ompt_enabled.enabled) {
2262 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00002263 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002264#endif
2265
Jonathan Peyton30419822017-05-12 18:01:32 +00002266 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002267}
2268
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002269#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002270static inline void __kmp_join_restore_state(kmp_info_t *thread,
2271 kmp_team_t *team) {
2272 // restore state outside the region
2273 thread->th.ompt_thread_info.state =
Joachim Protze82e94a52017-11-01 10:08:30 +00002274 ((team->t.t_serialized) ? omp_state_work_serial
2275 : omp_state_work_parallel);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002276}
2277
Joachim Protze82e94a52017-11-01 10:08:30 +00002278static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
2279 kmp_team_t *team, ompt_data_t *parallel_data,
2280 fork_context_e fork_context, void *codeptr) {
2281 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2282 if (ompt_enabled.ompt_callback_parallel_end) {
2283 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2284 parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
2285 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002286 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002287
Joachim Protzec255ca72017-11-05 14:11:10 +00002288 task_info->frame.enter_frame = NULL;
Jonathan Peyton30419822017-05-12 18:01:32 +00002289 __kmp_join_restore_state(thread, team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002290}
2291#endif
2292
Jonathan Peyton30419822017-05-12 18:01:32 +00002293void __kmp_join_call(ident_t *loc, int gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002294#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002295 ,
2296 enum fork_context_e fork_context
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002297#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002298#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002299 ,
2300 int exit_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00002301#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002302 ) {
2303 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2304 kmp_team_t *team;
2305 kmp_team_t *parent_team;
2306 kmp_info_t *master_th;
2307 kmp_root_t *root;
2308 int master_active;
2309 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002310
Jonathan Peyton30419822017-05-12 18:01:32 +00002311 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002312
Jonathan Peyton30419822017-05-12 18:01:32 +00002313 /* setup current data */
2314 master_th = __kmp_threads[gtid];
2315 root = master_th->th.th_root;
2316 team = master_th->th.th_team;
2317 parent_team = team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002318
Jonathan Peyton30419822017-05-12 18:01:32 +00002319 master_th->th.th_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002320
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002321#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002322 if (ompt_enabled.enabled) {
2323 master_th->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00002324 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002325#endif
2326
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002327#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00002328 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2329 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2330 "th_task_team = %p\n",
2331 __kmp_gtid_from_thread(master_th), team,
2332 team->t.t_task_team[master_th->th.th_task_state],
2333 master_th->th.th_task_team));
2334 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2335 team->t.t_task_team[master_th->th.th_task_state]);
2336 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002337#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002338
Jonathan Peyton30419822017-05-12 18:01:32 +00002339 if (team->t.t_serialized) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002340#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002341 if (master_th->th.th_teams_microtask) {
2342 // We are in teams construct
2343 int level = team->t.t_level;
2344 int tlevel = master_th->th.th_teams_level;
2345 if (level == tlevel) {
2346 // AC: we haven't incremented it earlier at start of teams construct,
2347 // so do it here - at the end of teams construct
2348 team->t.t_level++;
2349 } else if (level == tlevel + 1) {
2350 // AC: we are exiting parallel inside teams, need to increment
2351 // serialization in order to restore it in the next call to
2352 // __kmpc_end_serialized_parallel
2353 team->t.t_serialized++;
2354 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002355 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002356#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002357 __kmpc_end_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002358
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002359#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002360 if (ompt_enabled.enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002361 __kmp_join_restore_state(master_th, parent_team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002362 }
2363#endif
2364
Jonathan Peyton30419822017-05-12 18:01:32 +00002365 return;
2366 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002367
Jonathan Peyton30419822017-05-12 18:01:32 +00002368 master_active = team->t.t_master_active;
2369
2370#if OMP_40_ENABLED
2371 if (!exit_teams)
2372#endif /* OMP_40_ENABLED */
2373 {
2374 // AC: No barrier for internal teams at exit from teams construct.
2375 // But there is barrier for external team (league).
2376 __kmp_internal_join(loc, gtid, team);
2377 }
2378#if OMP_40_ENABLED
2379 else {
2380 master_th->th.th_task_state =
2381 0; // AC: no tasking in teams (out of any parallel)
2382 }
2383#endif /* OMP_40_ENABLED */
2384
2385 KMP_MB();
2386
2387#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002388 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2389 void *codeptr = team->t.ompt_team_info.master_return_address;
Jonathan Peyton30419822017-05-12 18:01:32 +00002390#endif
2391
2392#if USE_ITT_BUILD
2393 if (__itt_stack_caller_create_ptr) {
2394 __kmp_itt_stack_caller_destroy(
2395 (__itt_caller)team->t
2396 .t_stack_id); // destroy the stack stitching id after join barrier
2397 }
2398
Jonathan Peyton8c432f22018-01-04 22:56:47 +00002399 // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
Jonathan Peyton30419822017-05-12 18:01:32 +00002400 if (team->t.t_active_level == 1
2401#if OMP_40_ENABLED
2402 && !master_th->th.th_teams_microtask /* not in teams construct */
2403#endif /* OMP_40_ENABLED */
2404 ) {
2405 master_th->th.th_ident = loc;
2406 // only one notification scheme (either "submit" or "forking/joined", not
2407 // both)
2408 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2409 __kmp_forkjoin_frames_mode == 3)
2410 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2411 master_th->th.th_frame_time, 0, loc,
2412 master_th->th.th_team_nproc, 1);
2413 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2414 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2415 __kmp_itt_region_joined(gtid);
2416 } // active_level == 1
2417#endif /* USE_ITT_BUILD */
2418
2419#if OMP_40_ENABLED
2420 if (master_th->th.th_teams_microtask && !exit_teams &&
2421 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2422 team->t.t_level == master_th->th.th_teams_level + 1) {
2423 // AC: We need to leave the team structure intact at the end of parallel
2424 // inside the teams construct, so that at the next parallel same (hot) team
2425 // works, only adjust nesting levels
2426
2427 /* Decrement our nested depth level */
2428 team->t.t_level--;
2429 team->t.t_active_level--;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00002430 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
Jonathan Peyton30419822017-05-12 18:01:32 +00002431
2432 /* Restore number of threads in the team if needed */
2433 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2434 int old_num = master_th->th.th_team_nproc;
2435 int new_num = master_th->th.th_teams_size.nth;
2436 kmp_info_t **other_threads = team->t.t_threads;
2437 team->t.t_nproc = new_num;
2438 for (i = 0; i < old_num; ++i) {
2439 other_threads[i]->th.th_team_nproc = new_num;
2440 }
2441 // Adjust states of non-used threads of the team
2442 for (i = old_num; i < new_num; ++i) {
2443 // Re-initialize thread's barrier data.
2444 int b;
2445 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2446 for (b = 0; b < bs_last_barrier; ++b) {
2447 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2448 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2449#if USE_DEBUGGER
2450 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2451#endif
2452 }
2453 if (__kmp_tasking_mode != tskm_immediate_exec) {
2454 // Synchronize thread's task state
2455 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2456 }
2457 }
2458 }
2459
2460#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002461 if (ompt_enabled.enabled) {
2462 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2463 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002464 }
2465#endif
2466
2467 return;
2468 }
2469#endif /* OMP_40_ENABLED */
2470
2471 /* do cleanup and restore the parent team */
2472 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2473 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2474
2475 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2476
2477 /* jc: The following lock has instructions with REL and ACQ semantics,
2478 separating the parallel user code called in this parallel region
2479 from the serial user code called after this function returns. */
2480 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2481
2482#if OMP_40_ENABLED
2483 if (!master_th->th.th_teams_microtask ||
2484 team->t.t_level > master_th->th.th_teams_level)
2485#endif /* OMP_40_ENABLED */
2486 {
2487 /* Decrement our nested depth level */
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00002488 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
Jonathan Peyton30419822017-05-12 18:01:32 +00002489 }
2490 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2491
Joachim Protze82e94a52017-11-01 10:08:30 +00002492#if OMPT_SUPPORT
2493 if (ompt_enabled.enabled) {
2494 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2495 if (ompt_enabled.ompt_callback_implicit_task) {
2496 int ompt_team_size = team->t.t_nproc;
2497 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2498 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
Joachim Protze9be9cf22018-05-07 12:42:21 +00002499 OMPT_CUR_TASK_INFO(master_th)->thread_num);
Jonathan Peyton30419822017-05-12 18:01:32 +00002500 }
Joachim Protze82e94a52017-11-01 10:08:30 +00002501
Joachim Protzec255ca72017-11-05 14:11:10 +00002502 task_info->frame.exit_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00002503 task_info->task_data = ompt_data_none;
Jonathan Peyton30419822017-05-12 18:01:32 +00002504 }
2505#endif
2506
2507 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2508 master_th, team));
2509 __kmp_pop_current_task_from_thread(master_th);
2510
2511#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2512 // Restore master thread's partition.
2513 master_th->th.th_first_place = team->t.t_first_place;
2514 master_th->th.th_last_place = team->t.t_last_place;
2515#endif /* OMP_40_ENABLED */
2516
2517 updateHWFPControl(team);
2518
2519 if (root->r.r_active != master_active)
2520 root->r.r_active = master_active;
2521
2522 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2523 master_th)); // this will free worker threads
2524
2525 /* this race was fun to find. make sure the following is in the critical
2526 region otherwise assertions may fail occasionally since the old team may be
2527 reallocated and the hierarchy appears inconsistent. it is actually safe to
2528 run and won't cause any bugs, but will cause those assertion failures. it's
2529 only one deref&assign so might as well put this in the critical region */
2530 master_th->th.th_team = parent_team;
2531 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2532 master_th->th.th_team_master = parent_team->t.t_threads[0];
2533 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2534
2535 /* restore serialized team, if need be */
2536 if (parent_team->t.t_serialized &&
2537 parent_team != master_th->th.th_serial_team &&
2538 parent_team != root->r.r_root_team) {
2539 __kmp_free_team(root,
2540 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2541 master_th->th.th_serial_team = parent_team;
2542 }
2543
2544 if (__kmp_tasking_mode != tskm_immediate_exec) {
2545 if (master_th->th.th_task_state_top >
2546 0) { // Restore task state from memo stack
2547 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2548 // Remember master's state if we re-use this nested hot team
2549 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2550 master_th->th.th_task_state;
2551 --master_th->th.th_task_state_top; // pop
2552 // Now restore state at this level
2553 master_th->th.th_task_state =
2554 master_th->th
2555 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2556 }
2557 // Copy the task team from the parent team to the master thread
2558 master_th->th.th_task_team =
2559 parent_team->t.t_task_team[master_th->th.th_task_state];
2560 KA_TRACE(20,
2561 ("__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2562 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2563 parent_team));
2564 }
2565
2566 // TODO: GEH - cannot do this assertion because root thread not set up as
2567 // executing
2568 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2569 master_th->th.th_current_task->td_flags.executing = 1;
2570
2571 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2572
2573#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00002574 if (ompt_enabled.enabled) {
2575 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2576 codeptr);
Jonathan Peyton30419822017-05-12 18:01:32 +00002577 }
2578#endif
2579
2580 KMP_MB();
2581 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid));
2582}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002583
2584/* Check whether we should push an internal control record onto the
2585 serial team stack. If so, do it. */
Jonathan Peyton30419822017-05-12 18:01:32 +00002586void __kmp_save_internal_controls(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002587
Jonathan Peyton30419822017-05-12 18:01:32 +00002588 if (thread->th.th_team != thread->th.th_serial_team) {
2589 return;
2590 }
2591 if (thread->th.th_team->t.t_serialized > 1) {
2592 int push = 0;
2593
2594 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2595 push = 1;
2596 } else {
2597 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2598 thread->th.th_team->t.t_serialized) {
2599 push = 1;
2600 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002601 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002602 if (push) { /* push a record on the serial team's stack */
2603 kmp_internal_control_t *control =
2604 (kmp_internal_control_t *)__kmp_allocate(
2605 sizeof(kmp_internal_control_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002606
Jonathan Peyton30419822017-05-12 18:01:32 +00002607 copy_icvs(control, &thread->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002608
Jonathan Peyton30419822017-05-12 18:01:32 +00002609 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002610
Jonathan Peyton30419822017-05-12 18:01:32 +00002611 control->next = thread->th.th_team->t.t_control_stack_top;
2612 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002613 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002614 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002615}
2616
2617/* Changes set_nproc */
Jonathan Peyton30419822017-05-12 18:01:32 +00002618void __kmp_set_num_threads(int new_nth, int gtid) {
2619 kmp_info_t *thread;
2620 kmp_root_t *root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002621
Jonathan Peyton30419822017-05-12 18:01:32 +00002622 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2623 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002624
Jonathan Peyton30419822017-05-12 18:01:32 +00002625 if (new_nth < 1)
2626 new_nth = 1;
2627 else if (new_nth > __kmp_max_nth)
2628 new_nth = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002629
Jonathan Peyton30419822017-05-12 18:01:32 +00002630 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
2631 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002632
Jonathan Peyton30419822017-05-12 18:01:32 +00002633 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002634
Jonathan Peyton30419822017-05-12 18:01:32 +00002635 set__nproc(thread, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002636
Jonathan Peyton30419822017-05-12 18:01:32 +00002637 // If this omp_set_num_threads() call will cause the hot team size to be
2638 // reduced (in the absence of a num_threads clause), then reduce it now,
2639 // rather than waiting for the next parallel region.
2640 root = thread->th.th_root;
2641 if (__kmp_init_parallel && (!root->r.r_active) &&
2642 (root->r.r_hot_team->t.t_nproc > new_nth)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002643#if KMP_NESTED_HOT_TEAMS
2644 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2645#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002646 ) {
2647 kmp_team_t *hot_team = root->r.r_hot_team;
2648 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002649
Jonathan Peyton30419822017-05-12 18:01:32 +00002650 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002651
Jonathan Peyton30419822017-05-12 18:01:32 +00002652 // Release the extra threads we don't need any more.
2653 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2654 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2655 if (__kmp_tasking_mode != tskm_immediate_exec) {
2656 // When decreasing team size, threads no longer in the team should unref
2657 // task team.
2658 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2659 }
2660 __kmp_free_thread(hot_team->t.t_threads[f]);
2661 hot_team->t.t_threads[f] = NULL;
2662 }
2663 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002664#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002665 if (thread->th.th_hot_teams) {
2666 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2667 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2668 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002669#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002670
Jonathan Peyton30419822017-05-12 18:01:32 +00002671 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002672
Jonathan Peyton30419822017-05-12 18:01:32 +00002673 // Update the t_nproc field in the threads that are still active.
2674 for (f = 0; f < new_nth; f++) {
2675 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2676 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002677 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002678 // Special flag in case omp_set_num_threads() call
2679 hot_team->t.t_size_changed = -1;
2680 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002681}
2682
Jim Cownie5e8470a2013-09-27 10:38:44 +00002683/* Changes max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002684void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2685 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002686
Jonathan Peyton30419822017-05-12 18:01:32 +00002687 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "
2688 "%d = (%d)\n",
2689 gtid, max_active_levels));
2690 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002691
Jonathan Peyton30419822017-05-12 18:01:32 +00002692 // validate max_active_levels
2693 if (max_active_levels < 0) {
2694 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2695 // We ignore this call if the user has specified a negative value.
2696 // The current setting won't be changed. The last valid setting will be
2697 // used. A warning will be issued (if warnings are allowed as controlled by
2698 // the KMP_WARNINGS env var).
2699 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "
2700 "max_active_levels for thread %d = (%d)\n",
2701 gtid, max_active_levels));
2702 return;
2703 }
2704 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2705 // it's OK, the max_active_levels is within the valid range: [ 0;
2706 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2707 // We allow a zero value. (implementation defined behavior)
2708 } else {
2709 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2710 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2711 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2712 // Current upper limit is MAX_INT. (implementation defined behavior)
2713 // If the input exceeds the upper limit, we correct the input to be the
2714 // upper limit. (implementation defined behavior)
2715 // Actually, the flow should never get here until we use MAX_INT limit.
2716 }
2717 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "
2718 "max_active_levels for thread %d = (%d)\n",
2719 gtid, max_active_levels));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002720
Jonathan Peyton30419822017-05-12 18:01:32 +00002721 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002722
Jonathan Peyton30419822017-05-12 18:01:32 +00002723 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002724
Jonathan Peyton30419822017-05-12 18:01:32 +00002725 set__max_active_levels(thread, max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002726}
2727
2728/* Gets max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002729int __kmp_get_max_active_levels(int gtid) {
2730 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002731
Jonathan Peyton30419822017-05-12 18:01:32 +00002732 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid));
2733 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002734
Jonathan Peyton30419822017-05-12 18:01:32 +00002735 thread = __kmp_threads[gtid];
2736 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2737 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
2738 "curtask_maxaclevel=%d\n",
2739 gtid, thread->th.th_current_task,
2740 thread->th.th_current_task->td_icvs.max_active_levels));
2741 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002742}
2743
2744/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
Jonathan Peyton30419822017-05-12 18:01:32 +00002745void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2746 kmp_info_t *thread;
2747 // kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002748
Jonathan Peyton30419822017-05-12 18:01:32 +00002749 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2750 gtid, (int)kind, chunk));
2751 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002752
Jonathan Peyton30419822017-05-12 18:01:32 +00002753 // Check if the kind parameter is valid, correct if needed.
2754 // Valid parameters should fit in one of two intervals - standard or extended:
2755 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2756 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2757 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2758 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2759 // TODO: Hint needs attention in case we change the default schedule.
2760 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2761 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"),
2762 __kmp_msg_null);
2763 kind = kmp_sched_default;
2764 chunk = 0; // ignore chunk value in case of bad kind
2765 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002766
Jonathan Peyton30419822017-05-12 18:01:32 +00002767 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002768
Jonathan Peyton30419822017-05-12 18:01:32 +00002769 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002770
Jonathan Peyton30419822017-05-12 18:01:32 +00002771 if (kind < kmp_sched_upper_std) {
2772 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2773 // differ static chunked vs. unchunked: chunk should be invalid to
2774 // indicate unchunked schedule (which is the default)
2775 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002776 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002777 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2778 __kmp_sch_map[kind - kmp_sched_lower - 1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002779 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002780 } else {
2781 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2782 // kmp_sched_lower - 2 ];
2783 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2784 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2785 kmp_sched_lower - 2];
2786 }
Andrey Churbanovd454c732017-06-05 17:17:33 +00002787 if (kind == kmp_sched_auto || chunk < 1) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002788 // ignore parameter chunk for schedule auto
2789 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2790 } else {
2791 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2792 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002793}
2794
2795/* Gets def_sched_var ICV values */
Jonathan Peyton30419822017-05-12 18:01:32 +00002796void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2797 kmp_info_t *thread;
2798 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002799
Jonathan Peyton30419822017-05-12 18:01:32 +00002800 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid));
2801 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002802
Jonathan Peyton30419822017-05-12 18:01:32 +00002803 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002804
Jonathan Peyton30419822017-05-12 18:01:32 +00002805 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002806
Jonathan Peyton30419822017-05-12 18:01:32 +00002807 switch (th_type) {
2808 case kmp_sch_static:
2809 case kmp_sch_static_greedy:
2810 case kmp_sch_static_balanced:
2811 *kind = kmp_sched_static;
2812 *chunk = 0; // chunk was not set, try to show this fact via zero value
2813 return;
2814 case kmp_sch_static_chunked:
2815 *kind = kmp_sched_static;
2816 break;
2817 case kmp_sch_dynamic_chunked:
2818 *kind = kmp_sched_dynamic;
2819 break;
2820 case kmp_sch_guided_chunked:
2821 case kmp_sch_guided_iterative_chunked:
2822 case kmp_sch_guided_analytical_chunked:
2823 *kind = kmp_sched_guided;
2824 break;
2825 case kmp_sch_auto:
2826 *kind = kmp_sched_auto;
2827 break;
2828 case kmp_sch_trapezoidal:
2829 *kind = kmp_sched_trapezoidal;
2830 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002831#if KMP_STATIC_STEAL_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002832 case kmp_sch_static_steal:
2833 *kind = kmp_sched_static_steal;
2834 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002835#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002836 default:
2837 KMP_FATAL(UnknownSchedulingType, th_type);
2838 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002839
Jonathan Peyton30419822017-05-12 18:01:32 +00002840 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002841}
2842
Jonathan Peyton30419822017-05-12 18:01:32 +00002843int __kmp_get_ancestor_thread_num(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002844
Jonathan Peyton30419822017-05-12 18:01:32 +00002845 int ii, dd;
2846 kmp_team_t *team;
2847 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002848
Jonathan Peyton30419822017-05-12 18:01:32 +00002849 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2850 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002851
Jonathan Peyton30419822017-05-12 18:01:32 +00002852 // validate level
2853 if (level == 0)
2854 return 0;
2855 if (level < 0)
2856 return -1;
2857 thr = __kmp_threads[gtid];
2858 team = thr->th.th_team;
2859 ii = team->t.t_level;
2860 if (level > ii)
2861 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002862
2863#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002864 if (thr->th.th_teams_microtask) {
2865 // AC: we are in teams region where multiple nested teams have same level
2866 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2867 if (level <=
2868 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2869 KMP_DEBUG_ASSERT(ii >= tlevel);
2870 // AC: As we need to pass by the teams league, we need to artificially
2871 // increase ii
2872 if (ii == tlevel) {
2873 ii += 2; // three teams have same level
2874 } else {
2875 ii++; // two teams have same level
2876 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002877 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002878 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002879#endif
2880
Jonathan Peyton30419822017-05-12 18:01:32 +00002881 if (ii == level)
2882 return __kmp_tid_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002883
Jonathan Peyton30419822017-05-12 18:01:32 +00002884 dd = team->t.t_serialized;
2885 level++;
2886 while (ii > level) {
2887 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002888 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002889 if ((team->t.t_serialized) && (!dd)) {
2890 team = team->t.t_parent;
2891 continue;
2892 }
2893 if (ii > level) {
2894 team = team->t.t_parent;
2895 dd = team->t.t_serialized;
2896 ii--;
2897 }
2898 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002899
Jonathan Peyton30419822017-05-12 18:01:32 +00002900 return (dd > 1) ? (0) : (team->t.t_master_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002901}
2902
Jonathan Peyton30419822017-05-12 18:01:32 +00002903int __kmp_get_team_size(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002904
Jonathan Peyton30419822017-05-12 18:01:32 +00002905 int ii, dd;
2906 kmp_team_t *team;
2907 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002908
Jonathan Peyton30419822017-05-12 18:01:32 +00002909 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level));
2910 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002911
Jonathan Peyton30419822017-05-12 18:01:32 +00002912 // validate level
2913 if (level == 0)
2914 return 1;
2915 if (level < 0)
2916 return -1;
2917 thr = __kmp_threads[gtid];
2918 team = thr->th.th_team;
2919 ii = team->t.t_level;
2920 if (level > ii)
2921 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002922
2923#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002924 if (thr->th.th_teams_microtask) {
2925 // AC: we are in teams region where multiple nested teams have same level
2926 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2927 if (level <=
2928 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2929 KMP_DEBUG_ASSERT(ii >= tlevel);
2930 // AC: As we need to pass by the teams league, we need to artificially
2931 // increase ii
2932 if (ii == tlevel) {
2933 ii += 2; // three teams have same level
2934 } else {
2935 ii++; // two teams have same level
2936 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002937 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002938 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002939#endif
2940
Jonathan Peyton30419822017-05-12 18:01:32 +00002941 while (ii > level) {
2942 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002943 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002944 if (team->t.t_serialized && (!dd)) {
2945 team = team->t.t_parent;
2946 continue;
2947 }
2948 if (ii > level) {
2949 team = team->t.t_parent;
2950 ii--;
2951 }
2952 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002953
Jonathan Peyton30419822017-05-12 18:01:32 +00002954 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002955}
2956
Jonathan Peyton30419822017-05-12 18:01:32 +00002957kmp_r_sched_t __kmp_get_schedule_global() {
2958 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
2959 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
2960 // independently. So one can get the updated schedule here.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002961
Jonathan Peyton30419822017-05-12 18:01:32 +00002962 kmp_r_sched_t r_sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002963
Jonathan Peyton30419822017-05-12 18:01:32 +00002964 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
2965 // __kmp_guided. __kmp_sched should keep original value, so that user can set
2966 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
2967 // different roots (even in OMP 2.5)
2968 if (__kmp_sched == kmp_sch_static) {
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00002969 // replace STATIC with more detailed schedule (balanced or greedy)
2970 r_sched.r_sched_type = __kmp_static;
Jonathan Peyton30419822017-05-12 18:01:32 +00002971 } else if (__kmp_sched == kmp_sch_guided_chunked) {
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00002972 // replace GUIDED with more detailed schedule (iterative or analytical)
2973 r_sched.r_sched_type = __kmp_guided;
2974 } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2975 r_sched.r_sched_type = __kmp_sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00002976 }
2977
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00002978 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
2979 // __kmp_chunk may be wrong here (if it was not ever set)
Jonathan Peyton30419822017-05-12 18:01:32 +00002980 r_sched.chunk = KMP_DEFAULT_CHUNK;
2981 } else {
2982 r_sched.chunk = __kmp_chunk;
2983 }
2984
2985 return r_sched;
2986}
2987
2988/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2989 at least argc number of *t_argv entries for the requested team. */
2990static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
2991
2992 KMP_DEBUG_ASSERT(team);
2993 if (!realloc || argc > team->t.t_max_argc) {
2994
2995 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
2996 "current entries=%d\n",
2997 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
2998 /* if previously allocated heap space for args, free them */
2999 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3000 __kmp_free((void *)team->t.t_argv);
3001
3002 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3003 /* use unused space in the cache line for arguments */
3004 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3005 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
3006 "argv entries\n",
3007 team->t.t_id, team->t.t_max_argc));
3008 team->t.t_argv = &team->t.t_inline_argv[0];
3009 if (__kmp_storage_map) {
3010 __kmp_print_storage_map_gtid(
3011 -1, &team->t.t_inline_argv[0],
3012 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3013 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv",
3014 team->t.t_id);
3015 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003016 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00003017 /* allocate space for arguments in the heap */
3018 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3019 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3020 : 2 * argc;
3021 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3022 "argv entries\n",
3023 team->t.t_id, team->t.t_max_argc));
3024 team->t.t_argv =
3025 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc);
3026 if (__kmp_storage_map) {
3027 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3028 &team->t.t_argv[team->t.t_max_argc],
3029 sizeof(void *) * team->t.t_max_argc,
3030 "team_%d.t_argv", team->t.t_id);
3031 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003032 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003033 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003034}
3035
Jonathan Peyton30419822017-05-12 18:01:32 +00003036static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
3037 int i;
3038 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3039 team->t.t_threads =
3040 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth);
3041 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3042 sizeof(dispatch_shared_info_t) * num_disp_buff);
3043 team->t.t_dispatch =
3044 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth);
3045 team->t.t_implicit_task_taskdata =
3046 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth);
3047 team->t.t_max_nproc = max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003048
Jonathan Peyton30419822017-05-12 18:01:32 +00003049 /* setup dispatch buffers */
3050 for (i = 0; i < num_disp_buff; ++i) {
3051 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003052#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003053 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003054#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003055 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003056}
3057
Jonathan Peyton30419822017-05-12 18:01:32 +00003058static void __kmp_free_team_arrays(kmp_team_t *team) {
3059 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3060 int i;
3061 for (i = 0; i < team->t.t_max_nproc; ++i) {
3062 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3063 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3064 team->t.t_dispatch[i].th_disp_buffer = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003065 }
3066 }
Jonathan Peytonf6399362018-07-09 17:51:13 +00003067#if KMP_USE_HIER_SCHED
3068 __kmp_dispatch_free_hierarchies(team);
3069#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003070 __kmp_free(team->t.t_threads);
3071 __kmp_free(team->t.t_disp_buffer);
3072 __kmp_free(team->t.t_dispatch);
3073 __kmp_free(team->t.t_implicit_task_taskdata);
3074 team->t.t_threads = NULL;
3075 team->t.t_disp_buffer = NULL;
3076 team->t.t_dispatch = NULL;
3077 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003078}
3079
Jonathan Peyton30419822017-05-12 18:01:32 +00003080static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3081 kmp_info_t **oldThreads = team->t.t_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003082
Jonathan Peyton30419822017-05-12 18:01:32 +00003083 __kmp_free(team->t.t_disp_buffer);
3084 __kmp_free(team->t.t_dispatch);
3085 __kmp_free(team->t.t_implicit_task_taskdata);
3086 __kmp_allocate_team_arrays(team, max_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003087
Jonathan Peyton30419822017-05-12 18:01:32 +00003088 KMP_MEMCPY(team->t.t_threads, oldThreads,
3089 team->t.t_nproc * sizeof(kmp_info_t *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003090
Jonathan Peyton30419822017-05-12 18:01:32 +00003091 __kmp_free(oldThreads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003092}
3093
Jonathan Peyton30419822017-05-12 18:01:32 +00003094static kmp_internal_control_t __kmp_get_global_icvs(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003095
Jonathan Peyton30419822017-05-12 18:01:32 +00003096 kmp_r_sched_t r_sched =
3097 __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003098
3099#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003100 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003101#endif /* OMP_40_ENABLED */
3102
Jonathan Peyton30419822017-05-12 18:01:32 +00003103 kmp_internal_control_t g_icvs = {
3104 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3105 (kmp_int8)__kmp_dflt_nested, // int nested; //internal control
3106 // for nested parallelism (per thread)
3107 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3108 // adjustment of threads (per thread)
3109 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3110 // whether blocktime is explicitly set
3111 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003112#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00003113 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3114// intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003115#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003116 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3117 // next parallel region (per thread)
3118 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3119 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3120 // for max_active_levels
3121 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3122// {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003123#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003124 __kmp_nested_proc_bind.bind_types[0],
3125 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003126#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00003127 NULL // struct kmp_internal_control *next;
3128 };
Jim Cownie5e8470a2013-09-27 10:38:44 +00003129
Jonathan Peyton30419822017-05-12 18:01:32 +00003130 return g_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003131}
3132
Jonathan Peyton30419822017-05-12 18:01:32 +00003133static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003134
Jonathan Peyton30419822017-05-12 18:01:32 +00003135 kmp_internal_control_t gx_icvs;
3136 gx_icvs.serial_nesting_level =
3137 0; // probably =team->t.t_serial like in save_inter_controls
3138 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3139 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003140
Jonathan Peyton30419822017-05-12 18:01:32 +00003141 return gx_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003142}
3143
Jonathan Peyton30419822017-05-12 18:01:32 +00003144static void __kmp_initialize_root(kmp_root_t *root) {
3145 int f;
3146 kmp_team_t *root_team;
3147 kmp_team_t *hot_team;
3148 int hot_team_max_nth;
3149 kmp_r_sched_t r_sched =
3150 __kmp_get_schedule_global(); // get current state of scheduling globals
3151 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3152 KMP_DEBUG_ASSERT(root);
3153 KMP_ASSERT(!root->r.r_begin);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003154
Jonathan Peyton30419822017-05-12 18:01:32 +00003155 /* setup the root state structure */
3156 __kmp_init_lock(&root->r.r_begin_lock);
3157 root->r.r_begin = FALSE;
3158 root->r.r_active = FALSE;
3159 root->r.r_in_parallel = 0;
3160 root->r.r_blocktime = __kmp_dflt_blocktime;
3161 root->r.r_nested = __kmp_dflt_nested;
Jonathan Peytonf4392462017-07-27 20:58:41 +00003162 root->r.r_cg_nthreads = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003163
Jonathan Peyton30419822017-05-12 18:01:32 +00003164 /* setup the root team for this task */
3165 /* allocate the root team structure */
3166 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003167
Jonathan Peyton30419822017-05-12 18:01:32 +00003168 root_team =
3169 __kmp_allocate_team(root,
3170 1, // new_nproc
3171 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003172#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003173 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003174#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003175#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003176 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003177#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003178 &r_icvs,
3179 0 // argc
3180 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3181 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003182#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00003183 // Non-NULL value should be assigned to make the debugger display the root
3184 // team.
3185 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003186#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003187
Jonathan Peyton30419822017-05-12 18:01:32 +00003188 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003189
Jonathan Peyton30419822017-05-12 18:01:32 +00003190 root->r.r_root_team = root_team;
3191 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003192
Jonathan Peyton30419822017-05-12 18:01:32 +00003193 /* initialize root team */
3194 root_team->t.t_threads[0] = NULL;
3195 root_team->t.t_nproc = 1;
3196 root_team->t.t_serialized = 1;
3197 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003198 root_team->t.t_sched.sched = r_sched.sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00003199 KA_TRACE(
3200 20,
3201 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3202 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003203
Jonathan Peyton30419822017-05-12 18:01:32 +00003204 /* setup the hot team for this task */
3205 /* allocate the hot team structure */
3206 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003207
Jonathan Peyton30419822017-05-12 18:01:32 +00003208 hot_team =
3209 __kmp_allocate_team(root,
3210 1, // new_nproc
3211 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003212#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003213 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003214#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003215#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003216 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003217#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003218 &r_icvs,
3219 0 // argc
3220 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3221 );
3222 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003223
Jonathan Peyton30419822017-05-12 18:01:32 +00003224 root->r.r_hot_team = hot_team;
3225 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003226
Jonathan Peyton30419822017-05-12 18:01:32 +00003227 /* first-time initialization */
3228 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003229
Jonathan Peyton30419822017-05-12 18:01:32 +00003230 /* initialize hot team */
3231 hot_team_max_nth = hot_team->t.t_max_nproc;
3232 for (f = 0; f < hot_team_max_nth; ++f) {
3233 hot_team->t.t_threads[f] = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003234 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003235 hot_team->t.t_nproc = 1;
3236 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00003237 hot_team->t.t_sched.sched = r_sched.sched;
Jonathan Peyton30419822017-05-12 18:01:32 +00003238 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003239}
3240
3241#ifdef KMP_DEBUG
3242
Jim Cownie5e8470a2013-09-27 10:38:44 +00003243typedef struct kmp_team_list_item {
Jonathan Peyton30419822017-05-12 18:01:32 +00003244 kmp_team_p const *entry;
3245 struct kmp_team_list_item *next;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003246} kmp_team_list_item_t;
Jonathan Peyton30419822017-05-12 18:01:32 +00003247typedef kmp_team_list_item_t *kmp_team_list_t;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003248
Jonathan Peyton30419822017-05-12 18:01:32 +00003249static void __kmp_print_structure_team_accum( // Add team to list of teams.
3250 kmp_team_list_t list, // List of teams.
3251 kmp_team_p const *team // Team to add.
3252 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003253
Jonathan Peyton30419822017-05-12 18:01:32 +00003254 // List must terminate with item where both entry and next are NULL.
3255 // Team is added to the list only once.
3256 // List is sorted in ascending order by team id.
3257 // Team id is *not* a key.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003258
Jonathan Peyton30419822017-05-12 18:01:32 +00003259 kmp_team_list_t l;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003260
Jonathan Peyton30419822017-05-12 18:01:32 +00003261 KMP_DEBUG_ASSERT(list != NULL);
3262 if (team == NULL) {
3263 return;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003264 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003265
Jonathan Peyton30419822017-05-12 18:01:32 +00003266 __kmp_print_structure_team_accum(list, team->t.t_parent);
3267 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003268
Jonathan Peyton30419822017-05-12 18:01:32 +00003269 // Search list for the team.
3270 l = list;
3271 while (l->next != NULL && l->entry != team) {
3272 l = l->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003273 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003274 if (l->next != NULL) {
3275 return; // Team has been added before, exit.
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003276 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003277
Jonathan Peyton30419822017-05-12 18:01:32 +00003278 // Team is not found. Search list again for insertion point.
3279 l = list;
3280 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3281 l = l->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003282 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003283
Jonathan Peyton30419822017-05-12 18:01:32 +00003284 // Insert team.
3285 {
3286 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3287 sizeof(kmp_team_list_item_t));
3288 *item = *l;
3289 l->entry = team;
3290 l->next = item;
3291 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003292}
3293
Jonathan Peyton30419822017-05-12 18:01:32 +00003294static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
Jim Cownie5e8470a2013-09-27 10:38:44 +00003295
Jonathan Peyton30419822017-05-12 18:01:32 +00003296 ) {
3297 __kmp_printf("%s", title);
3298 if (team != NULL) {
3299 __kmp_printf("%2x %p\n", team->t.t_id, team);
3300 } else {
3301 __kmp_printf(" - (nil)\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003302 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003303}
3304
Jonathan Peyton30419822017-05-12 18:01:32 +00003305static void __kmp_print_structure_thread(char const *title,
3306 kmp_info_p const *thread) {
3307 __kmp_printf("%s", title);
3308 if (thread != NULL) {
3309 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3310 } else {
3311 __kmp_printf(" - (nil)\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003312 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003313}
3314
Jonathan Peyton30419822017-05-12 18:01:32 +00003315void __kmp_print_structure(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003316
Jonathan Peyton30419822017-05-12 18:01:32 +00003317 kmp_team_list_t list;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003318
Jonathan Peyton30419822017-05-12 18:01:32 +00003319 // Initialize list of teams.
3320 list =
3321 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t));
3322 list->entry = NULL;
3323 list->next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003324
Jonathan Peyton30419822017-05-12 18:01:32 +00003325 __kmp_printf("\n------------------------------\nGlobal Thread "
3326 "Table\n------------------------------\n");
3327 {
3328 int gtid;
3329 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3330 __kmp_printf("%2d", gtid);
3331 if (__kmp_threads != NULL) {
3332 __kmp_printf(" %p", __kmp_threads[gtid]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003333 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003334 if (__kmp_root != NULL) {
3335 __kmp_printf(" %p", __kmp_root[gtid]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003336 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003337 __kmp_printf("\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003338 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003339 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003340
Jonathan Peyton30419822017-05-12 18:01:32 +00003341 // Print out __kmp_threads array.
3342 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3343 "----------\n");
3344 if (__kmp_threads != NULL) {
3345 int gtid;
3346 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3347 kmp_info_t const *thread = __kmp_threads[gtid];
3348 if (thread != NULL) {
3349 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3350 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3351 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3352 __kmp_print_structure_team(" Serial Team: ",
3353 thread->th.th_serial_team);
3354 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3355 __kmp_print_structure_thread(" Master: ",
3356 thread->th.th_team_master);
3357 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3358 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003359#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003360 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003361#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003362 __kmp_print_structure_thread(" Next in pool: ",
3363 thread->th.th_next_pool);
3364 __kmp_printf("\n");
3365 __kmp_print_structure_team_accum(list, thread->th.th_team);
3366 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003367 }
3368 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003369 } else {
3370 __kmp_printf("Threads array is not allocated.\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003371 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003372
Jonathan Peyton30419822017-05-12 18:01:32 +00003373 // Print out __kmp_root array.
3374 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3375 "--------\n");
3376 if (__kmp_root != NULL) {
3377 int gtid;
3378 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3379 kmp_root_t const *root = __kmp_root[gtid];
3380 if (root != NULL) {
3381 __kmp_printf("GTID %2d %p:\n", gtid, root);
3382 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3383 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3384 __kmp_print_structure_thread(" Uber Thread: ",
3385 root->r.r_uber_thread);
3386 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3387 __kmp_printf(" Nested?: %2d\n", root->r.r_nested);
Jonathan Peyton61d44f12018-07-09 18:09:25 +00003388 __kmp_printf(" In Parallel: %2d\n",
3389 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
Jonathan Peyton30419822017-05-12 18:01:32 +00003390 __kmp_printf("\n");
3391 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3392 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003393 }
3394 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003395 } else {
3396 __kmp_printf("Ubers array is not allocated.\n");
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003397 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003398
Jonathan Peyton30419822017-05-12 18:01:32 +00003399 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3400 "--------\n");
3401 while (list->next != NULL) {
3402 kmp_team_p const *team = list->entry;
3403 int i;
3404 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3405 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3406 __kmp_printf(" Master TID: %2d\n", team->t.t_master_tid);
3407 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3408 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3409 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3410 for (i = 0; i < team->t.t_nproc; ++i) {
3411 __kmp_printf(" Thread %2d: ", i);
3412 __kmp_print_structure_thread("", team->t.t_threads[i]);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003413 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003414 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3415 __kmp_printf("\n");
3416 list = list->next;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003417 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003418
Jonathan Peyton30419822017-05-12 18:01:32 +00003419 // Print out __kmp_thread_pool and __kmp_team_pool.
3420 __kmp_printf("\n------------------------------\nPools\n----------------------"
3421 "--------\n");
3422 __kmp_print_structure_thread("Thread pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003423 CCAST(kmp_info_t *, __kmp_thread_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003424 __kmp_print_structure_team("Team pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003425 CCAST(kmp_team_t *, __kmp_team_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003426 __kmp_printf("\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +00003427
Jonathan Peyton30419822017-05-12 18:01:32 +00003428 // Free team list.
3429 while (list != NULL) {
3430 kmp_team_list_item_t *item = list;
3431 list = list->next;
3432 KMP_INTERNAL_FREE(item);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003433 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003434}
3435
3436#endif
3437
Jim Cownie5e8470a2013-09-27 10:38:44 +00003438//---------------------------------------------------------------------------
3439// Stuff for per-thread fast random number generator
3440// Table of primes
Jim Cownie5e8470a2013-09-27 10:38:44 +00003441static const unsigned __kmp_primes[] = {
Jonathan Peyton30419822017-05-12 18:01:32 +00003442 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3443 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3444 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3445 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3446 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3447 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3448 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3449 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3450 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3451 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3452 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
Jim Cownie5e8470a2013-09-27 10:38:44 +00003453
3454//---------------------------------------------------------------------------
3455// __kmp_get_random: Get a random number using a linear congruential method.
Jonathan Peyton30419822017-05-12 18:01:32 +00003456unsigned short __kmp_get_random(kmp_info_t *thread) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003457 unsigned x = thread->th.th_x;
Jonathan Peyton30419822017-05-12 18:01:32 +00003458 unsigned short r = x >> 16;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003459
Jonathan Peyton30419822017-05-12 18:01:32 +00003460 thread->th.th_x = x * thread->th.th_a + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003461
3462 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
Jonathan Peyton30419822017-05-12 18:01:32 +00003463 thread->th.th_info.ds.ds_tid, r));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003464
3465 return r;
3466}
3467//--------------------------------------------------------
3468// __kmp_init_random: Initialize a random number generator
Jonathan Peyton30419822017-05-12 18:01:32 +00003469void __kmp_init_random(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003470 unsigned seed = thread->th.th_info.ds.ds_tid;
3471
Jonathan Peyton30419822017-05-12 18:01:32 +00003472 thread->th.th_a =
3473 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3474 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3475 KA_TRACE(30,
3476 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003477}
3478
Jim Cownie5e8470a2013-09-27 10:38:44 +00003479#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00003480/* reclaim array entries for root threads that are already dead, returns number
3481 * reclaimed */
3482static int __kmp_reclaim_dead_roots(void) {
3483 int i, r = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003484
Jonathan Peyton30419822017-05-12 18:01:32 +00003485 for (i = 0; i < __kmp_threads_capacity; ++i) {
3486 if (KMP_UBER_GTID(i) &&
3487 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3488 !__kmp_root[i]
3489 ->r.r_active) { // AC: reclaim only roots died in non-active state
3490 r += __kmp_unregister_root_other_thread(i);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003491 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003492 }
3493 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003494}
3495#endif
3496
Jonathan Peyton30419822017-05-12 18:01:32 +00003497/* This function attempts to create free entries in __kmp_threads and
3498 __kmp_root, and returns the number of free entries generated.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003499
Jonathan Peyton30419822017-05-12 18:01:32 +00003500 For Windows* OS static library, the first mechanism used is to reclaim array
3501 entries for root threads that are already dead.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003502
Jonathan Peyton30419822017-05-12 18:01:32 +00003503 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3504 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3505 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3506 threadprivate cache array has been created. Synchronization with
3507 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003508
Jonathan Peyton30419822017-05-12 18:01:32 +00003509 After any dead root reclamation, if the clipping value allows array expansion
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003510 to result in the generation of a total of nNeed free slots, the function does
3511 that expansion. If not, nothing is done beyond the possible initial root
3512 thread reclamation.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003513
Jonathan Peyton30419822017-05-12 18:01:32 +00003514 If any argument is negative, the behavior is undefined. */
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003515static int __kmp_expand_threads(int nNeed) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003516 int added = 0;
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003517 int minimumRequiredCapacity;
3518 int newCapacity;
3519 kmp_info_t **newThreads;
3520 kmp_root_t **newRoot;
3521
3522// All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so
3523// resizing __kmp_threads does not need additional protection if foreign
3524// threads are present
Jim Cownie5e8470a2013-09-27 10:38:44 +00003525
Jonathan Peyton99016992015-05-26 17:32:53 +00003526#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +00003527 /* only for Windows static library */
3528 /* reclaim array entries for root threads that are already dead */
3529 added = __kmp_reclaim_dead_roots();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003530
Jonathan Peyton30419822017-05-12 18:01:32 +00003531 if (nNeed) {
3532 nNeed -= added;
3533 if (nNeed < 0)
3534 nNeed = 0;
3535 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003536#endif
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003537 if (nNeed <= 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003538 return added;
Jonathan Peyton30419822017-05-12 18:01:32 +00003539
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003540 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3541 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3542 // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
3543 // > __kmp_max_nth in one of two ways:
3544 //
3545 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3546 // may not be resused by another thread, so we may need to increase
3547 // __kmp_threads_capacity to __kmp_max_nth + 1.
3548 //
3549 // 2) New foreign root(s) are encountered. We always register new foreign
3550 // roots. This may cause a smaller # of threads to be allocated at
3551 // subsequent parallel regions, but the worker threads hang around (and
3552 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3553 //
3554 // Anyway, that is the reason for moving the check to see if
3555 // __kmp_max_nth was exceeded into __kmp_reserve_threads()
3556 // instead of having it performed here. -BB
Jonathan Peyton30419822017-05-12 18:01:32 +00003557
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003558 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
Jonathan Peyton30419822017-05-12 18:01:32 +00003559
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003560 /* compute expansion headroom to check if we can expand */
3561 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3562 /* possible expansion too small -- give up */
3563 return added;
Jonathan Peyton30419822017-05-12 18:01:32 +00003564 }
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00003565 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3566
3567 newCapacity = __kmp_threads_capacity;
3568 do {
3569 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3570 : __kmp_sys_max_nth;
3571 } while (newCapacity < minimumRequiredCapacity);
3572 newThreads = (kmp_info_t **)__kmp_allocate(
3573 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3574 newRoot =
3575 (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity);
3576 KMP_MEMCPY(newThreads, __kmp_threads,
3577 __kmp_threads_capacity * sizeof(kmp_info_t *));
3578 KMP_MEMCPY(newRoot, __kmp_root,
3579 __kmp_threads_capacity * sizeof(kmp_root_t *));
3580
3581 kmp_info_t **temp_threads = __kmp_threads;
3582 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3583 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3584 __kmp_free(temp_threads);
3585 added += newCapacity - __kmp_threads_capacity;
3586 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3587
3588 if (newCapacity > __kmp_tp_capacity) {
3589 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3590 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3591 __kmp_threadprivate_resize_cache(newCapacity);
3592 } else { // increase __kmp_tp_capacity to correspond with kmp_threads size
3593 *(volatile int *)&__kmp_tp_capacity = newCapacity;
3594 }
3595 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3596 }
3597
Jonathan Peyton30419822017-05-12 18:01:32 +00003598 return added;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003599}
3600
Jonathan Peyton30419822017-05-12 18:01:32 +00003601/* Register the current thread as a root thread and obtain our gtid. We must
3602 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3603 thread that calls from __kmp_do_serial_initialize() */
3604int __kmp_register_root(int initial_thread) {
3605 kmp_info_t *root_thread;
3606 kmp_root_t *root;
3607 int gtid;
3608 int capacity;
3609 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3610 KA_TRACE(20, ("__kmp_register_root: entered\n"));
3611 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003612
Jonathan Peyton30419822017-05-12 18:01:32 +00003613 /* 2007-03-02:
3614 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3615 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3616 work as expected -- it may return false (that means there is at least one
3617 empty slot in __kmp_threads array), but it is possible the only free slot
3618 is #0, which is reserved for initial thread and so cannot be used for this
3619 one. Following code workarounds this bug.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003620
Jonathan Peyton30419822017-05-12 18:01:32 +00003621 However, right solution seems to be not reserving slot #0 for initial
3622 thread because:
3623 (1) there is no magic in slot #0,
3624 (2) we cannot detect initial thread reliably (the first thread which does
3625 serial initialization may be not a real initial thread).
3626 */
3627 capacity = __kmp_threads_capacity;
3628 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3629 --capacity;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003630 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003631
Jonathan Peyton30419822017-05-12 18:01:32 +00003632 /* see if there are too many threads */
Jonathan Peyton1800ece2018-01-10 18:27:01 +00003633 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00003634 if (__kmp_tp_cached) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00003635 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3636 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3637 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00003638 } else {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00003639 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3640 __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003641 }
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003642 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003643
3644 /* find an available thread slot */
3645 /* Don't reassign the zero slot since we need that to only be used by initial
3646 thread */
3647 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3648 gtid++)
3649 ;
3650 KA_TRACE(1,
3651 ("__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3652 KMP_ASSERT(gtid < __kmp_threads_capacity);
3653
3654 /* update global accounting */
3655 __kmp_all_nth++;
3656 TCW_4(__kmp_nth, __kmp_nth + 1);
3657
3658 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3659 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3660 if (__kmp_adjust_gtid_mode) {
3661 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3662 if (TCR_4(__kmp_gtid_mode) != 2) {
3663 TCW_4(__kmp_gtid_mode, 2);
3664 }
3665 } else {
3666 if (TCR_4(__kmp_gtid_mode) != 1) {
3667 TCW_4(__kmp_gtid_mode, 1);
3668 }
3669 }
3670 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003671
3672#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00003673 /* Adjust blocktime to zero if necessary */
3674 /* Middle initialization might not have occurred yet */
3675 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3676 if (__kmp_nth > __kmp_avail_proc) {
3677 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003678 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003679 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003680#endif /* KMP_ADJUST_BLOCKTIME */
3681
Jonathan Peyton30419822017-05-12 18:01:32 +00003682 /* setup this new hierarchy */
3683 if (!(root = __kmp_root[gtid])) {
3684 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t));
3685 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3686 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003687
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003688#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003689 // Initialize stats as soon as possible (right after gtid assignment).
3690 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00003691 __kmp_stats_thread_ptr->startLife();
Jonathan Peyton30419822017-05-12 18:01:32 +00003692 KMP_SET_THREAD_STATE(SERIAL_REGION);
3693 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003694#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003695 __kmp_initialize_root(root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003696
Jonathan Peyton30419822017-05-12 18:01:32 +00003697 /* setup new root thread structure */
3698 if (root->r.r_uber_thread) {
3699 root_thread = root->r.r_uber_thread;
3700 } else {
3701 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
3702 if (__kmp_storage_map) {
3703 __kmp_print_thread_storage_map(root_thread, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003704 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003705 root_thread->th.th_info.ds.ds_gtid = gtid;
Joachim Protze82e94a52017-11-01 10:08:30 +00003706#if OMPT_SUPPORT
3707 root_thread->th.ompt_thread_info.thread_data.ptr = NULL;
3708#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003709 root_thread->th.th_root = root;
3710 if (__kmp_env_consistency_check) {
3711 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3712 }
3713#if USE_FAST_MEMORY
3714 __kmp_initialize_fast_memory(root_thread);
3715#endif /* USE_FAST_MEMORY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003716
Jonathan Peyton30419822017-05-12 18:01:32 +00003717#if KMP_USE_BGET
3718 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3719 __kmp_initialize_bget(root_thread);
3720#endif
3721 __kmp_init_random(root_thread); // Initialize random number generator
3722 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003723
Jonathan Peyton30419822017-05-12 18:01:32 +00003724 /* setup the serial team held in reserve by the root thread */
3725 if (!root_thread->th.th_serial_team) {
3726 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3727 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"));
3728 root_thread->th.th_serial_team =
3729 __kmp_allocate_team(root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003730#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003731 ompt_data_none, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003732#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003733#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003734 proc_bind_default,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003735#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003736 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3737 }
3738 KMP_ASSERT(root_thread->th.th_serial_team);
3739 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",
3740 root_thread->th.th_serial_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003741
Jonathan Peyton30419822017-05-12 18:01:32 +00003742 /* drop root_thread into place */
3743 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003744
Jonathan Peyton30419822017-05-12 18:01:32 +00003745 root->r.r_root_team->t.t_threads[0] = root_thread;
3746 root->r.r_hot_team->t.t_threads[0] = root_thread;
3747 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3748 // AC: the team created in reserve, not for execution (it is unused for now).
3749 root_thread->th.th_serial_team->t.t_serialized = 0;
3750 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003751
Jonathan Peyton30419822017-05-12 18:01:32 +00003752 /* initialize the thread, get it ready to go */
3753 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3754 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003755
Jonathan Peyton30419822017-05-12 18:01:32 +00003756 /* prepare the master thread for get_gtid() */
3757 __kmp_gtid_set_specific(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003758
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003759#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003760 __kmp_itt_thread_name(gtid);
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003761#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003762
Jonathan Peyton30419822017-05-12 18:01:32 +00003763#ifdef KMP_TDATA_GTID
3764 __kmp_gtid = gtid;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003765#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003766 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3767 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3768
3769 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3770 "plain=%u\n",
3771 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3772 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3773 KMP_INIT_BARRIER_STATE));
3774 { // Initialize barrier data.
3775 int b;
3776 for (b = 0; b < bs_last_barrier; ++b) {
3777 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3778#if USE_DEBUGGER
3779 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3780#endif
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00003781 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003782 }
3783 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3784 KMP_INIT_BARRIER_STATE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003785
Alp Toker763b9392014-02-28 09:42:41 +00003786#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00003787#if OMP_40_ENABLED
3788 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3789 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3790 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3791 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3792#endif
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003793
Jonathan Peyton30419822017-05-12 18:01:32 +00003794 if (TCR_4(__kmp_init_middle)) {
3795 __kmp_affinity_set_init_mask(gtid, TRUE);
3796 }
Alp Toker763b9392014-02-28 09:42:41 +00003797#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003798
Jonathan Peyton30419822017-05-12 18:01:32 +00003799 __kmp_root_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003800
Joachim Protze82e94a52017-11-01 10:08:30 +00003801#if OMPT_SUPPORT
3802 if (!initial_thread && ompt_enabled.enabled) {
3803
3804 ompt_thread_t *root_thread = ompt_get_thread();
3805
3806 ompt_set_thread_state(root_thread, omp_state_overhead);
3807
3808 if (ompt_enabled.ompt_callback_thread_begin) {
3809 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3810 ompt_thread_initial, __ompt_get_thread_data_internal());
3811 }
3812 ompt_data_t *task_data;
3813 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
3814 if (ompt_enabled.ompt_callback_task_create) {
3815 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
3816 NULL, NULL, task_data, ompt_task_initial, 0, NULL);
3817 // initial task has nothing to return to
3818 }
3819
3820 ompt_set_thread_state(root_thread, omp_state_work_serial);
3821 }
3822#endif
3823
Jonathan Peyton30419822017-05-12 18:01:32 +00003824 KMP_MB();
3825 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003826
Jonathan Peyton30419822017-05-12 18:01:32 +00003827 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003828}
3829
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003830#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003831static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
3832 const int max_level) {
3833 int i, n, nth;
3834 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3835 if (!hot_teams || !hot_teams[level].hot_team) {
3836 return 0;
3837 }
3838 KMP_DEBUG_ASSERT(level < max_level);
3839 kmp_team_t *team = hot_teams[level].hot_team;
3840 nth = hot_teams[level].hot_team_nth;
3841 n = nth - 1; // master is not freed
3842 if (level < max_level - 1) {
3843 for (i = 0; i < nth; ++i) {
3844 kmp_info_t *th = team->t.t_threads[i];
3845 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3846 if (i > 0 && th->th.th_hot_teams) {
3847 __kmp_free(th->th.th_hot_teams);
3848 th->th.th_hot_teams = NULL;
3849 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003850 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003851 }
3852 __kmp_free_team(root, team, NULL);
3853 return n;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003854}
3855#endif
3856
Jonathan Peyton30419822017-05-12 18:01:32 +00003857// Resets a root thread and clear its root and hot teams.
3858// Returns the number of __kmp_threads entries directly and indirectly freed.
3859static int __kmp_reset_root(int gtid, kmp_root_t *root) {
3860 kmp_team_t *root_team = root->r.r_root_team;
3861 kmp_team_t *hot_team = root->r.r_hot_team;
3862 int n = hot_team->t.t_nproc;
3863 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003864
Jonathan Peyton30419822017-05-12 18:01:32 +00003865 KMP_DEBUG_ASSERT(!root->r.r_active);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003866
Jonathan Peyton30419822017-05-12 18:01:32 +00003867 root->r.r_root_team = NULL;
3868 root->r.r_hot_team = NULL;
3869 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
3870 // before call to __kmp_free_team().
3871 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003872#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003873 if (__kmp_hot_teams_max_level >
3874 0) { // need to free nested hot teams and their threads if any
3875 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3876 kmp_info_t *th = hot_team->t.t_threads[i];
3877 if (__kmp_hot_teams_max_level > 1) {
3878 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3879 }
3880 if (th->th.th_hot_teams) {
3881 __kmp_free(th->th.th_hot_teams);
3882 th->th.th_hot_teams = NULL;
3883 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003884 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003885 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003886#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003887 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003888
Jonathan Peyton30419822017-05-12 18:01:32 +00003889 // Before we can reap the thread, we need to make certain that all other
3890 // threads in the teams that had this root as ancestor have stopped trying to
3891 // steal tasks.
3892 if (__kmp_tasking_mode != tskm_immediate_exec) {
3893 __kmp_wait_to_unref_task_teams();
3894 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003895
Jonathan Peyton30419822017-05-12 18:01:32 +00003896#if KMP_OS_WINDOWS
3897 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3898 KA_TRACE(
3899 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3900 "\n",
3901 (LPVOID) & (root->r.r_uber_thread->th),
3902 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3903 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3904#endif /* KMP_OS_WINDOWS */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003905
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003906#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00003907 if (ompt_enabled.ompt_callback_thread_end) {
3908 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3909 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
Jonathan Peyton30419822017-05-12 18:01:32 +00003910 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003911#endif
3912
Jonathan Peyton30419822017-05-12 18:01:32 +00003913 TCW_4(__kmp_nth,
3914 __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
Jonathan Peytonf4392462017-07-27 20:58:41 +00003915 root->r.r_cg_nthreads--;
3916
Jonathan Peyton30419822017-05-12 18:01:32 +00003917 __kmp_reap_thread(root->r.r_uber_thread, 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003918
Jonathan Peyton30419822017-05-12 18:01:32 +00003919 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead
3920 // of freeing.
3921 root->r.r_uber_thread = NULL;
3922 /* mark root as no longer in use */
3923 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003924
Jonathan Peyton30419822017-05-12 18:01:32 +00003925 return n;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003926}
3927
Jonathan Peyton30419822017-05-12 18:01:32 +00003928void __kmp_unregister_root_current_thread(int gtid) {
3929 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3930 /* this lock should be ok, since unregister_root_current_thread is never
3931 called during an abort, only during a normal close. furthermore, if you
3932 have the forkjoin lock, you should never try to get the initz lock */
3933 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3934 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3935 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "
3936 "exiting T#%d\n",
3937 gtid));
3938 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3939 return;
3940 }
3941 kmp_root_t *root = __kmp_root[gtid];
Jim Cownie77c2a632014-09-03 11:34:33 +00003942
Jonathan Peyton30419822017-05-12 18:01:32 +00003943 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3944 KMP_ASSERT(KMP_UBER_GTID(gtid));
3945 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3946 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003947
Jonathan Peyton30419822017-05-12 18:01:32 +00003948 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003949
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003950#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003951 kmp_info_t *thread = __kmp_threads[gtid];
3952 kmp_team_t *team = thread->th.th_team;
3953 kmp_task_team_t *task_team = thread->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003954
Jonathan Peyton30419822017-05-12 18:01:32 +00003955 // we need to wait for the proxy tasks before finishing the thread
3956 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003957#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003958 // the runtime is shutting down so we won't report any events
Joachim Protze82e94a52017-11-01 10:08:30 +00003959 thread->th.ompt_thread_info.state = omp_state_undefined;
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003960#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003961 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3962 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003963#endif
3964
Jonathan Peyton30419822017-05-12 18:01:32 +00003965 __kmp_reset_root(gtid, root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003966
Jonathan Peyton30419822017-05-12 18:01:32 +00003967 /* free up this thread slot */
3968 __kmp_gtid_set_specific(KMP_GTID_DNE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003969#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +00003970 __kmp_gtid = KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003971#endif
3972
Jonathan Peyton30419822017-05-12 18:01:32 +00003973 KMP_MB();
3974 KC_TRACE(10,
3975 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003976
Jonathan Peyton30419822017-05-12 18:01:32 +00003977 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003978}
3979
Jonathan Peyton2321d572015-06-08 19:25:25 +00003980#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003981/* __kmp_forkjoin_lock must be already held
Jonathan Peyton30419822017-05-12 18:01:32 +00003982 Unregisters a root thread that is not the current thread. Returns the number
3983 of __kmp_threads entries freed as a result. */
3984static int __kmp_unregister_root_other_thread(int gtid) {
3985 kmp_root_t *root = __kmp_root[gtid];
3986 int r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003987
Jonathan Peyton30419822017-05-12 18:01:32 +00003988 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
3989 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3990 KMP_ASSERT(KMP_UBER_GTID(gtid));
3991 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3992 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003993
Jonathan Peyton30419822017-05-12 18:01:32 +00003994 r = __kmp_reset_root(gtid, root);
3995 KC_TRACE(10,
3996 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
3997 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003998}
Jonathan Peyton2321d572015-06-08 19:25:25 +00003999#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004000
Jim Cownie5e8470a2013-09-27 10:38:44 +00004001#if KMP_DEBUG
4002void __kmp_task_info() {
4003
Jonathan Peyton30419822017-05-12 18:01:32 +00004004 kmp_int32 gtid = __kmp_entry_gtid();
4005 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4006 kmp_info_t *this_thr = __kmp_threads[gtid];
4007 kmp_team_t *steam = this_thr->th.th_serial_team;
4008 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004009
Jonathan Peyton30419822017-05-12 18:01:32 +00004010 __kmp_printf("__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p "
4011 "ptask=%p\n",
4012 gtid, tid, this_thr, team, this_thr->th.th_current_task,
4013 team->t.t_implicit_task_taskdata[tid].td_parent);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004014}
4015#endif // KMP_DEBUG
4016
Jonathan Peyton30419822017-05-12 18:01:32 +00004017/* TODO optimize with one big memclr, take out what isn't needed, split
4018 responsibility to workers as much as possible, and delay initialization of
4019 features as much as possible */
4020static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4021 int tid, int gtid) {
4022 /* this_thr->th.th_info.ds.ds_gtid is setup in
4023 kmp_allocate_thread/create_worker.
4024 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4025 kmp_info_t *master = team->t.t_threads[0];
4026 KMP_DEBUG_ASSERT(this_thr != NULL);
4027 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4028 KMP_DEBUG_ASSERT(team);
4029 KMP_DEBUG_ASSERT(team->t.t_threads);
4030 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4031 KMP_DEBUG_ASSERT(master);
4032 KMP_DEBUG_ASSERT(master->th.th_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004033
Jonathan Peyton30419822017-05-12 18:01:32 +00004034 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004035
Jonathan Peyton30419822017-05-12 18:01:32 +00004036 TCW_SYNC_PTR(this_thr->th.th_team, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004037
Jonathan Peyton30419822017-05-12 18:01:32 +00004038 this_thr->th.th_info.ds.ds_tid = tid;
4039 this_thr->th.th_set_nproc = 0;
4040 if (__kmp_tasking_mode != tskm_immediate_exec)
4041 // When tasking is possible, threads are not safe to reap until they are
4042 // done tasking; this will be set when tasking code is exited in wait
4043 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4044 else // no tasking --> always safe to reap
4045 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004046#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004047 this_thr->th.th_set_proc_bind = proc_bind_default;
4048#if KMP_AFFINITY_SUPPORTED
4049 this_thr->th.th_new_place = this_thr->th.th_current_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004050#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004051#endif
4052 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004053
Jonathan Peyton30419822017-05-12 18:01:32 +00004054 /* setup the thread's cache of the team structure */
4055 this_thr->th.th_team_nproc = team->t.t_nproc;
4056 this_thr->th.th_team_master = master;
4057 this_thr->th.th_team_serialized = team->t.t_serialized;
4058 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004059
Jonathan Peyton30419822017-05-12 18:01:32 +00004060 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004061
Jonathan Peyton30419822017-05-12 18:01:32 +00004062 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4063 tid, gtid, this_thr, this_thr->th.th_current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004064
Jonathan Peyton30419822017-05-12 18:01:32 +00004065 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4066 team, tid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004067
Jonathan Peyton30419822017-05-12 18:01:32 +00004068 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4069 tid, gtid, this_thr, this_thr->th.th_current_task));
4070 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4071 // __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004072
Jonathan Peyton30419822017-05-12 18:01:32 +00004073 /* TODO no worksharing in speculative threads */
4074 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004075
Jonathan Peyton30419822017-05-12 18:01:32 +00004076 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004077
Jonathan Peyton30419822017-05-12 18:01:32 +00004078 if (!this_thr->th.th_pri_common) {
4079 this_thr->th.th_pri_common =
4080 (struct common_table *)__kmp_allocate(sizeof(struct common_table));
4081 if (__kmp_storage_map) {
4082 __kmp_print_storage_map_gtid(
4083 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4084 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00004085 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004086 this_thr->th.th_pri_head = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00004087 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004088
Jonathan Peyton30419822017-05-12 18:01:32 +00004089 /* Initialize dynamic dispatch */
4090 {
4091 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4092 // Use team max_nproc since this will never change for the team.
4093 size_t disp_size =
4094 sizeof(dispatch_private_info_t) *
4095 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4096 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4097 team->t.t_max_nproc));
4098 KMP_ASSERT(dispatch);
4099 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4100 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004101
Jonathan Peyton30419822017-05-12 18:01:32 +00004102 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004103#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004104 dispatch->th_doacross_buf_idx = 0;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004105#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004106 if (!dispatch->th_disp_buffer) {
4107 dispatch->th_disp_buffer =
4108 (dispatch_private_info_t *)__kmp_allocate(disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004109
Jonathan Peyton30419822017-05-12 18:01:32 +00004110 if (__kmp_storage_map) {
4111 __kmp_print_storage_map_gtid(
4112 gtid, &dispatch->th_disp_buffer[0],
4113 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4114 ? 1
4115 : __kmp_dispatch_num_buffers],
4116 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4117 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4118 gtid, team->t.t_id, gtid);
4119 }
4120 } else {
4121 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004122 }
4123
Jonathan Peyton30419822017-05-12 18:01:32 +00004124 dispatch->th_dispatch_pr_current = 0;
4125 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004126
Jonathan Peyton30419822017-05-12 18:01:32 +00004127 dispatch->th_deo_fcn = 0; /* ORDERED */
4128 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4129 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004130
Jonathan Peyton30419822017-05-12 18:01:32 +00004131 this_thr->th.th_next_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004132
Jonathan Peyton30419822017-05-12 18:01:32 +00004133 if (!this_thr->th.th_task_state_memo_stack) {
4134 size_t i;
4135 this_thr->th.th_task_state_memo_stack =
4136 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8));
4137 this_thr->th.th_task_state_top = 0;
4138 this_thr->th.th_task_state_stack_sz = 4;
4139 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4140 ++i) // zero init the stack
4141 this_thr->th.th_task_state_memo_stack[i] = 0;
4142 }
4143
4144 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4145 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4146
4147 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004148}
4149
Jonathan Peyton30419822017-05-12 18:01:32 +00004150/* allocate a new thread for the requesting team. this is only called from
4151 within a forkjoin critical section. we will first try to get an available
4152 thread from the thread pool. if none is available, we will fork a new one
4153 assuming we are able to create a new one. this should be assured, as the
4154 caller should check on this first. */
4155kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4156 int new_tid) {
4157 kmp_team_t *serial_team;
4158 kmp_info_t *new_thr;
4159 int new_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004160
Jonathan Peyton30419822017-05-12 18:01:32 +00004161 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4162 KMP_DEBUG_ASSERT(root && team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004163#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004164 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004165#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004166 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004167
Jonathan Peyton30419822017-05-12 18:01:32 +00004168 /* first, try to get one from the thread pool */
4169 if (__kmp_thread_pool) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004170
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004171 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00004172 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4173 if (new_thr == __kmp_thread_pool_insert_pt) {
4174 __kmp_thread_pool_insert_pt = NULL;
4175 }
4176 TCW_4(new_thr->th.th_in_pool, FALSE);
4177 // Don't touch th_active_in_pool or th_active.
4178 // The worker thread adjusts those flags as it sleeps/awakens.
4179 __kmp_thread_pool_nth--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004180
Jonathan Peyton30419822017-05-12 18:01:32 +00004181 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4182 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4183 KMP_ASSERT(!new_thr->th.th_team);
4184 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4185 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004186
Jonathan Peyton30419822017-05-12 18:01:32 +00004187 /* setup the thread structure */
4188 __kmp_initialize_info(new_thr, team, new_tid,
4189 new_thr->th.th_info.ds.ds_gtid);
4190 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004191
Jonathan Peyton30419822017-05-12 18:01:32 +00004192 TCW_4(__kmp_nth, __kmp_nth + 1);
Jonathan Peytonf4392462017-07-27 20:58:41 +00004193 root->r.r_cg_nthreads++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004194
Jonathan Peyton30419822017-05-12 18:01:32 +00004195 new_thr->th.th_task_state = 0;
4196 new_thr->th.th_task_state_top = 0;
4197 new_thr->th.th_task_state_stack_sz = 4;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004198
Jim Cownie5e8470a2013-09-27 10:38:44 +00004199#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00004200 /* Adjust blocktime back to zero if necessary */
4201 /* Middle initialization might not have occurred yet */
4202 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4203 if (__kmp_nth > __kmp_avail_proc) {
4204 __kmp_zero_bt = TRUE;
4205 }
4206 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004207#endif /* KMP_ADJUST_BLOCKTIME */
4208
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004209#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004210 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4211 // KMP_BARRIER_PARENT_FLAG.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004212 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00004213 kmp_balign_t *balign = new_thr->th.th_bar;
4214 for (b = 0; b < bs_last_barrier; ++b)
4215 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004216#endif
4217
Jonathan Peyton30419822017-05-12 18:01:32 +00004218 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4219 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004220
Jim Cownie5e8470a2013-09-27 10:38:44 +00004221 KMP_MB();
4222 return new_thr;
Jonathan Peyton30419822017-05-12 18:01:32 +00004223 }
4224
4225 /* no, well fork a new one */
4226 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4227 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4228
4229#if KMP_USE_MONITOR
4230 // If this is the first worker thread the RTL is creating, then also
4231 // launch the monitor thread. We try to do this as early as possible.
4232 if (!TCR_4(__kmp_init_monitor)) {
4233 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4234 if (!TCR_4(__kmp_init_monitor)) {
4235 KF_TRACE(10, ("before __kmp_create_monitor\n"));
4236 TCW_4(__kmp_init_monitor, 1);
4237 __kmp_create_monitor(&__kmp_monitor);
4238 KF_TRACE(10, ("after __kmp_create_monitor\n"));
4239#if KMP_OS_WINDOWS
4240 // AC: wait until monitor has started. This is a fix for CQ232808.
4241 // The reason is that if the library is loaded/unloaded in a loop with
4242 // small (parallel) work in between, then there is high probability that
4243 // monitor thread started after the library shutdown. At shutdown it is
4244 // too late to cope with the problem, because when the master is in
4245 // DllMain (process detach) the monitor has no chances to start (it is
4246 // blocked), and master has no means to inform the monitor that the
4247 // library has gone, because all the memory which the monitor can access
4248 // is going to be released/reset.
4249 while (TCR_4(__kmp_init_monitor) < 2) {
4250 KMP_YIELD(TRUE);
4251 }
4252 KF_TRACE(10, ("after monitor thread has started\n"));
4253#endif
4254 }
4255 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4256 }
4257#endif
4258
4259 KMP_MB();
4260 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4261 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4262 }
4263
4264 /* allocate space for it. */
4265 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
4266
4267 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4268
4269 if (__kmp_storage_map) {
4270 __kmp_print_thread_storage_map(new_thr, new_gtid);
4271 }
4272
4273 // add the reserve serialized team, initialized from the team's master thread
4274 {
4275 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4276 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"));
4277 new_thr->th.th_serial_team = serial_team =
4278 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4279#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00004280 ompt_data_none, // root parallel id
Jonathan Peyton30419822017-05-12 18:01:32 +00004281#endif
4282#if OMP_40_ENABLED
4283 proc_bind_default,
4284#endif
4285 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4286 }
4287 KMP_ASSERT(serial_team);
4288 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4289 // execution (it is unused for now).
4290 serial_team->t.t_threads[0] = new_thr;
4291 KF_TRACE(10,
4292 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4293 new_thr));
4294
4295 /* setup the thread structures */
4296 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4297
4298#if USE_FAST_MEMORY
4299 __kmp_initialize_fast_memory(new_thr);
4300#endif /* USE_FAST_MEMORY */
4301
4302#if KMP_USE_BGET
4303 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4304 __kmp_initialize_bget(new_thr);
4305#endif
4306
4307 __kmp_init_random(new_thr); // Initialize random number generator
4308
4309 /* Initialize these only once when thread is grabbed for a team allocation */
4310 KA_TRACE(20,
4311 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4312 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4313
4314 int b;
4315 kmp_balign_t *balign = new_thr->th.th_bar;
4316 for (b = 0; b < bs_last_barrier; ++b) {
4317 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4318 balign[b].bb.team = NULL;
4319 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4320 balign[b].bb.use_oncore_barrier = 0;
4321 }
4322
4323 new_thr->th.th_spin_here = FALSE;
4324 new_thr->th.th_next_waiting = 0;
Jonathan Peytona764af62018-07-19 19:17:00 +00004325#if KMP_OS_UNIX
4326 new_thr->th.th_blocking = false;
4327#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004328
4329#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4330 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4331 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4332 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4333 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4334#endif
4335
4336 TCW_4(new_thr->th.th_in_pool, FALSE);
4337 new_thr->th.th_active_in_pool = FALSE;
4338 TCW_4(new_thr->th.th_active, TRUE);
4339
4340 /* adjust the global counters */
4341 __kmp_all_nth++;
4342 __kmp_nth++;
4343
Jonathan Peytonf4392462017-07-27 20:58:41 +00004344 root->r.r_cg_nthreads++;
4345
Jonathan Peyton30419822017-05-12 18:01:32 +00004346 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4347 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4348 if (__kmp_adjust_gtid_mode) {
4349 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4350 if (TCR_4(__kmp_gtid_mode) != 2) {
4351 TCW_4(__kmp_gtid_mode, 2);
4352 }
4353 } else {
4354 if (TCR_4(__kmp_gtid_mode) != 1) {
4355 TCW_4(__kmp_gtid_mode, 1);
4356 }
4357 }
4358 }
4359
4360#ifdef KMP_ADJUST_BLOCKTIME
4361 /* Adjust blocktime back to zero if necessary */
4362 /* Middle initialization might not have occurred yet */
4363 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4364 if (__kmp_nth > __kmp_avail_proc) {
4365 __kmp_zero_bt = TRUE;
4366 }
4367 }
4368#endif /* KMP_ADJUST_BLOCKTIME */
4369
4370 /* actually fork it and create the new worker thread */
4371 KF_TRACE(
4372 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4373 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4374 KF_TRACE(10,
4375 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4376
4377 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4378 new_gtid));
4379 KMP_MB();
4380 return new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004381}
4382
Jonathan Peyton30419822017-05-12 18:01:32 +00004383/* Reinitialize team for reuse.
4384 The hot team code calls this case at every fork barrier, so EPCC barrier
4385 test are extremely sensitive to changes in it, esp. writes to the team
4386 struct, which cause a cache invalidation in all threads.
4387 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4388static void __kmp_reinitialize_team(kmp_team_t *team,
4389 kmp_internal_control_t *new_icvs,
4390 ident_t *loc) {
4391 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4392 team->t.t_threads[0], team));
4393 KMP_DEBUG_ASSERT(team && new_icvs);
4394 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4395 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004396
Jonathan Peyton30419822017-05-12 18:01:32 +00004397 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jonathan Peyton30419822017-05-12 18:01:32 +00004398 // Copy ICVs to the master thread's implicit taskdata
4399 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4400 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004401
Jonathan Peyton30419822017-05-12 18:01:32 +00004402 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4403 team->t.t_threads[0], team));
Jim Cownie181b4bb2013-12-23 17:28:57 +00004404}
4405
Jonathan Peyton30419822017-05-12 18:01:32 +00004406/* Initialize the team data structure.
4407 This assumes the t_threads and t_max_nproc are already set.
4408 Also, we don't touch the arguments */
4409static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4410 kmp_internal_control_t *new_icvs,
4411 ident_t *loc) {
4412 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004413
Jonathan Peyton30419822017-05-12 18:01:32 +00004414 /* verify */
4415 KMP_DEBUG_ASSERT(team);
4416 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4417 KMP_DEBUG_ASSERT(team->t.t_threads);
4418 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004419
Jonathan Peyton30419822017-05-12 18:01:32 +00004420 team->t.t_master_tid = 0; /* not needed */
4421 /* team->t.t_master_bar; not needed */
4422 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4423 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004424
Jonathan Peyton30419822017-05-12 18:01:32 +00004425 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4426 team->t.t_next_pool = NULL;
4427 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4428 * up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004429
Jonathan Peyton30419822017-05-12 18:01:32 +00004430 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
4431 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004432
Jonathan Peyton30419822017-05-12 18:01:32 +00004433 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00004434 team->t.t_sched.sched = new_icvs->sched.sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004435
4436#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00004437 team->t.t_fp_control_saved = FALSE; /* not needed */
4438 team->t.t_x87_fpu_control_word = 0; /* not needed */
4439 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004440#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4441
Jonathan Peyton30419822017-05-12 18:01:32 +00004442 team->t.t_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004443
Jonathan Peyton30419822017-05-12 18:01:32 +00004444 team->t.t_ordered.dt.t_value = 0;
4445 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004446
Jonathan Peyton30419822017-05-12 18:01:32 +00004447 memset(&team->t.t_taskq, '\0', sizeof(kmp_taskq_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004448
4449#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004450 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004451#endif
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00004452#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00004453 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00004454#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004455
Jonathan Peyton30419822017-05-12 18:01:32 +00004456 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004457
Jonathan Peyton30419822017-05-12 18:01:32 +00004458 __kmp_reinitialize_team(team, new_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004459
Jonathan Peyton30419822017-05-12 18:01:32 +00004460 KMP_MB();
4461 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004462}
4463
Alp Toker98758b02014-03-02 04:12:06 +00004464#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004465/* Sets full mask for thread and returns old mask, no changes to structures. */
4466static void
Jonathan Peyton30419822017-05-12 18:01:32 +00004467__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4468 if (KMP_AFFINITY_CAPABLE()) {
4469 int status;
4470 if (old_mask != NULL) {
4471 status = __kmp_get_system_affinity(old_mask, TRUE);
4472 int error = errno;
4473 if (status != 0) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00004474 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4475 __kmp_msg_null);
Jonathan Peyton30419822017-05-12 18:01:32 +00004476 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004477 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004478 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4479 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004480}
4481#endif
4482
Alp Toker98758b02014-03-02 04:12:06 +00004483#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004484
Jim Cownie5e8470a2013-09-27 10:38:44 +00004485// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4486// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004487// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004488// The master thread's partition should already include its current binding.
Jonathan Peyton30419822017-05-12 18:01:32 +00004489static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4490 // Copy the master thread's place partion to the team struct
4491 kmp_info_t *master_th = team->t.t_threads[0];
4492 KMP_DEBUG_ASSERT(master_th != NULL);
4493 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4494 int first_place = master_th->th.th_first_place;
4495 int last_place = master_th->th.th_last_place;
4496 int masters_place = master_th->th.th_current_place;
4497 team->t.t_first_place = first_place;
4498 team->t.t_last_place = last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004499
Jonathan Peyton30419822017-05-12 18:01:32 +00004500 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4501 "bound to place %d partition = [%d,%d]\n",
4502 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4503 team->t.t_id, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004504
Jonathan Peyton30419822017-05-12 18:01:32 +00004505 switch (proc_bind) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004506
Jonathan Peyton30419822017-05-12 18:01:32 +00004507 case proc_bind_default:
4508 // serial teams might have the proc_bind policy set to proc_bind_default. It
4509 // doesn't matter, as we don't rebind master thread for any proc_bind policy
4510 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4511 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004512
Jonathan Peyton30419822017-05-12 18:01:32 +00004513 case proc_bind_master: {
4514 int f;
4515 int n_th = team->t.t_nproc;
4516 for (f = 1; f < n_th; f++) {
4517 kmp_info_t *th = team->t.t_threads[f];
4518 KMP_DEBUG_ASSERT(th != NULL);
4519 th->th.th_first_place = first_place;
4520 th->th.th_last_place = last_place;
4521 th->th.th_new_place = masters_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004522
Jonathan Peyton30419822017-05-12 18:01:32 +00004523 KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d "
4524 "partition = [%d,%d]\n",
4525 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4526 f, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004527 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004528 } break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004529
Jonathan Peyton30419822017-05-12 18:01:32 +00004530 case proc_bind_close: {
4531 int f;
4532 int n_th = team->t.t_nproc;
4533 int n_places;
4534 if (first_place <= last_place) {
4535 n_places = last_place - first_place + 1;
4536 } else {
4537 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4538 }
4539 if (n_th <= n_places) {
4540 int place = masters_place;
4541 for (f = 1; f < n_th; f++) {
4542 kmp_info_t *th = team->t.t_threads[f];
4543 KMP_DEBUG_ASSERT(th != NULL);
4544
4545 if (place == last_place) {
4546 place = first_place;
4547 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4548 place = 0;
4549 } else {
4550 place++;
4551 }
4552 th->th.th_first_place = first_place;
4553 th->th.th_last_place = last_place;
4554 th->th.th_new_place = place;
4555
4556 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4557 "partition = [%d,%d]\n",
4558 __kmp_gtid_from_thread(team->t.t_threads[f]),
4559 team->t.t_id, f, place, first_place, last_place));
4560 }
4561 } else {
4562 int S, rem, gap, s_count;
4563 S = n_th / n_places;
4564 s_count = 0;
4565 rem = n_th - (S * n_places);
4566 gap = rem > 0 ? n_places / rem : n_places;
4567 int place = masters_place;
4568 int gap_ct = gap;
4569 for (f = 0; f < n_th; f++) {
4570 kmp_info_t *th = team->t.t_threads[f];
4571 KMP_DEBUG_ASSERT(th != NULL);
4572
4573 th->th.th_first_place = first_place;
4574 th->th.th_last_place = last_place;
4575 th->th.th_new_place = place;
4576 s_count++;
4577
4578 if ((s_count == S) && rem && (gap_ct == gap)) {
4579 // do nothing, add an extra thread to place on next iteration
4580 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4581 // we added an extra thread to this place; move to next place
4582 if (place == last_place) {
4583 place = first_place;
4584 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4585 place = 0;
4586 } else {
4587 place++;
4588 }
4589 s_count = 0;
4590 gap_ct = 1;
4591 rem--;
4592 } else if (s_count == S) { // place full; don't add extra
4593 if (place == last_place) {
4594 place = first_place;
4595 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4596 place = 0;
4597 } else {
4598 place++;
4599 }
4600 gap_ct++;
4601 s_count = 0;
4602 }
4603
4604 KA_TRACE(100,
4605 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4606 "partition = [%d,%d]\n",
4607 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4608 th->th.th_new_place, first_place, last_place));
4609 }
4610 KMP_DEBUG_ASSERT(place == masters_place);
4611 }
4612 } break;
4613
4614 case proc_bind_spread: {
4615 int f;
4616 int n_th = team->t.t_nproc;
4617 int n_places;
4618 int thidx;
4619 if (first_place <= last_place) {
4620 n_places = last_place - first_place + 1;
4621 } else {
4622 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4623 }
4624 if (n_th <= n_places) {
Paul Osmialowskia0162792017-08-10 23:04:11 +00004625 int place = -1;
Jonathan Peyton30419822017-05-12 18:01:32 +00004626
Paul Osmialowskia0162792017-08-10 23:04:11 +00004627 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4628 int S = n_places / n_th;
4629 int s_count, rem, gap, gap_ct;
4630
4631 place = masters_place;
4632 rem = n_places - n_th * S;
4633 gap = rem ? n_th / rem : 1;
4634 gap_ct = gap;
4635 thidx = n_th;
4636 if (update_master_only == 1)
4637 thidx = 1;
4638 for (f = 0; f < thidx; f++) {
4639 kmp_info_t *th = team->t.t_threads[f];
4640 KMP_DEBUG_ASSERT(th != NULL);
4641
4642 th->th.th_first_place = place;
4643 th->th.th_new_place = place;
4644 s_count = 1;
4645 while (s_count < S) {
4646 if (place == last_place) {
4647 place = first_place;
4648 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4649 place = 0;
4650 } else {
4651 place++;
4652 }
4653 s_count++;
4654 }
4655 if (rem && (gap_ct == gap)) {
4656 if (place == last_place) {
4657 place = first_place;
4658 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4659 place = 0;
4660 } else {
4661 place++;
4662 }
4663 rem--;
4664 gap_ct = 0;
4665 }
4666 th->th.th_last_place = place;
4667 gap_ct++;
4668
Jonathan Peyton30419822017-05-12 18:01:32 +00004669 if (place == last_place) {
4670 place = first_place;
4671 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4672 place = 0;
4673 } else {
4674 place++;
4675 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004676
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004677 KA_TRACE(100,
4678 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4679 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4680 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4681 f, th->th.th_new_place, th->th.th_first_place,
4682 th->th.th_last_place, __kmp_affinity_num_masks));
Jonathan Peyton30419822017-05-12 18:01:32 +00004683 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004684 } else {
4685 /* Having uniform space of available computation places I can create
4686 T partitions of round(P/T) size and put threads into the first
4687 place of each partition. */
4688 double current = static_cast<double>(masters_place);
4689 double spacing =
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004690 (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
Paul Osmialowskia0162792017-08-10 23:04:11 +00004691 int first, last;
4692 kmp_info_t *th;
4693
4694 thidx = n_th + 1;
4695 if (update_master_only == 1)
4696 thidx = 1;
4697 for (f = 0; f < thidx; f++) {
4698 first = static_cast<int>(current);
4699 last = static_cast<int>(current + spacing) - 1;
4700 KMP_DEBUG_ASSERT(last >= first);
4701 if (first >= n_places) {
4702 if (masters_place) {
4703 first -= n_places;
4704 last -= n_places;
4705 if (first == (masters_place + 1)) {
4706 KMP_DEBUG_ASSERT(f == n_th);
4707 first--;
4708 }
4709 if (last == masters_place) {
4710 KMP_DEBUG_ASSERT(f == (n_th - 1));
4711 last--;
4712 }
4713 } else {
4714 KMP_DEBUG_ASSERT(f == n_th);
4715 first = 0;
4716 last = 0;
4717 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004718 }
Paul Osmialowskia0162792017-08-10 23:04:11 +00004719 if (last >= n_places) {
4720 last = (n_places - 1);
4721 }
4722 place = first;
4723 current += spacing;
4724 if (f < n_th) {
4725 KMP_DEBUG_ASSERT(0 <= first);
4726 KMP_DEBUG_ASSERT(n_places > first);
4727 KMP_DEBUG_ASSERT(0 <= last);
4728 KMP_DEBUG_ASSERT(n_places > last);
4729 KMP_DEBUG_ASSERT(last_place >= first_place);
4730 th = team->t.t_threads[f];
4731 KMP_DEBUG_ASSERT(th);
4732 th->th.th_first_place = first;
4733 th->th.th_new_place = place;
4734 th->th.th_last_place = last;
Jonathan Peyton30419822017-05-12 18:01:32 +00004735
Jonathan Peyton94a114f2017-10-20 19:30:57 +00004736 KA_TRACE(100,
4737 ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4738 "partition = [%d,%d], spacing = %.4f\n",
4739 __kmp_gtid_from_thread(team->t.t_threads[f]),
4740 team->t.t_id, f, th->th.th_new_place,
4741 th->th.th_first_place, th->th.th_last_place, spacing));
Paul Osmialowskia0162792017-08-10 23:04:11 +00004742 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004743 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004744 }
4745 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4746 } else {
4747 int S, rem, gap, s_count;
4748 S = n_th / n_places;
4749 s_count = 0;
4750 rem = n_th - (S * n_places);
4751 gap = rem > 0 ? n_places / rem : n_places;
4752 int place = masters_place;
4753 int gap_ct = gap;
4754 thidx = n_th;
4755 if (update_master_only == 1)
4756 thidx = 1;
4757 for (f = 0; f < thidx; f++) {
4758 kmp_info_t *th = team->t.t_threads[f];
4759 KMP_DEBUG_ASSERT(th != NULL);
4760
4761 th->th.th_first_place = place;
4762 th->th.th_last_place = place;
4763 th->th.th_new_place = place;
4764 s_count++;
4765
4766 if ((s_count == S) && rem && (gap_ct == gap)) {
4767 // do nothing, add an extra thread to place on next iteration
4768 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4769 // we added an extra thread to this place; move on to next place
4770 if (place == last_place) {
4771 place = first_place;
4772 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4773 place = 0;
4774 } else {
4775 place++;
4776 }
4777 s_count = 0;
4778 gap_ct = 1;
4779 rem--;
4780 } else if (s_count == S) { // place is full; don't add extra thread
4781 if (place == last_place) {
4782 place = first_place;
4783 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4784 place = 0;
4785 } else {
4786 place++;
4787 }
4788 gap_ct++;
4789 s_count = 0;
4790 }
4791
4792 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4793 "partition = [%d,%d]\n",
4794 __kmp_gtid_from_thread(team->t.t_threads[f]),
4795 team->t.t_id, f, th->th.th_new_place,
4796 th->th.th_first_place, th->th.th_last_place));
4797 }
4798 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4799 }
4800 } break;
4801
4802 default:
4803 break;
4804 }
4805
4806 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004807}
4808
Alp Toker98758b02014-03-02 04:12:06 +00004809#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004810
Jonathan Peyton30419822017-05-12 18:01:32 +00004811/* allocate a new team data structure to use. take one off of the free pool if
4812 available */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004813kmp_team_t *
Jonathan Peyton30419822017-05-12 18:01:32 +00004814__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004815#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00004816 ompt_data_t ompt_parallel_data,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004817#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004818#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004819 kmp_proc_bind_t new_proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00004820#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004821 kmp_internal_control_t *new_icvs,
4822 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4823 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4824 int f;
4825 kmp_team_t *team;
4826 int use_hot_team = !root->r.r_active;
4827 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004828
Jonathan Peyton30419822017-05-12 18:01:32 +00004829 KA_TRACE(20, ("__kmp_allocate_team: called\n"));
4830 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4831 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4832 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004833
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004834#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004835 kmp_hot_team_ptr_t *hot_teams;
4836 if (master) {
4837 team = master->th.th_team;
4838 level = team->t.t_active_level;
4839 if (master->th.th_teams_microtask) { // in teams construct?
4840 if (master->th.th_teams_size.nteams > 1 &&
4841 ( // #teams > 1
4842 team->t.t_pkfn ==
4843 (microtask_t)__kmp_teams_master || // inner fork of the teams
4844 master->th.th_teams_level <
4845 team->t.t_level)) { // or nested parallel inside the teams
4846 ++level; // not increment if #teams==1, or for outer fork of the teams;
4847 // increment otherwise
4848 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004849 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004850 hot_teams = master->th.th_hot_teams;
4851 if (level < __kmp_hot_teams_max_level && hot_teams &&
4852 hot_teams[level]
4853 .hot_team) { // hot team has already been allocated for given level
4854 use_hot_team = 1;
4855 } else {
4856 use_hot_team = 0;
4857 }
4858 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004859#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004860 // Optimization to use a "hot" team
4861 if (use_hot_team && new_nproc > 1) {
4862 KMP_DEBUG_ASSERT(new_nproc == max_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004863#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004864 team = hot_teams[level].hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004865#else
Jonathan Peyton30419822017-05-12 18:01:32 +00004866 team = root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004867#endif
4868#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004869 if (__kmp_tasking_mode != tskm_immediate_exec) {
4870 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
4871 "task_team[1] = %p before reinit\n",
4872 team->t.t_task_team[0], team->t.t_task_team[1]));
4873 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004874#endif
4875
Jonathan Peyton30419822017-05-12 18:01:32 +00004876 // Has the number of threads changed?
4877 /* Let's assume the most common case is that the number of threads is
4878 unchanged, and put that case first. */
4879 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4880 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"));
4881 // This case can mean that omp_set_num_threads() was called and the hot
Jonathan Peyton642688b2017-06-01 16:46:36 +00004882 // team size was already reduced, so we check the special flag
Jonathan Peyton30419822017-05-12 18:01:32 +00004883 if (team->t.t_size_changed == -1) {
4884 team->t.t_size_changed = 1;
4885 } else {
4886 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4887 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004888
Jonathan Peyton30419822017-05-12 18:01:32 +00004889 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4890 kmp_r_sched_t new_sched = new_icvs->sched;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00004891 // set master's schedule as new run-time schedule
4892 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004893
Jonathan Peyton30419822017-05-12 18:01:32 +00004894 __kmp_reinitialize_team(team, new_icvs,
4895 root->r.r_uber_thread->th.th_ident);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004896
Jonathan Peyton30419822017-05-12 18:01:32 +00004897 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4898 team->t.t_threads[0], team));
4899 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004900
4901#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004902#if KMP_AFFINITY_SUPPORTED
4903 if ((team->t.t_size_changed == 0) &&
4904 (team->t.t_proc_bind == new_proc_bind)) {
4905 if (new_proc_bind == proc_bind_spread) {
4906 __kmp_partition_places(
4907 team, 1); // add flag to update only master for spread
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004908 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004909 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "
4910 "proc_bind = %d, partition = [%d,%d]\n",
4911 team->t.t_id, new_proc_bind, team->t.t_first_place,
4912 team->t.t_last_place));
4913 } else {
4914 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4915 __kmp_partition_places(team);
4916 }
4917#else
4918 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4919#endif /* KMP_AFFINITY_SUPPORTED */
4920#endif /* OMP_40_ENABLED */
4921 } else if (team->t.t_nproc > new_nproc) {
4922 KA_TRACE(20,
4923 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",
4924 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004925
Jonathan Peyton30419822017-05-12 18:01:32 +00004926 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004927#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004928 if (__kmp_hot_teams_mode == 0) {
4929 // AC: saved number of threads should correspond to team's value in this
4930 // mode, can be bigger in mode 1, when hot team has threads in reserve
4931 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4932 hot_teams[level].hot_team_nth = new_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004933#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004934 /* release the extra threads we don't need any more */
4935 for (f = new_nproc; f < team->t.t_nproc; f++) {
4936 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4937 if (__kmp_tasking_mode != tskm_immediate_exec) {
4938 // When decreasing team size, threads no longer in the team should
4939 // unref task team.
4940 team->t.t_threads[f]->th.th_task_team = NULL;
4941 }
4942 __kmp_free_thread(team->t.t_threads[f]);
4943 team->t.t_threads[f] = NULL;
4944 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004945#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004946 } // (__kmp_hot_teams_mode == 0)
4947 else {
4948 // When keeping extra threads in team, switch threads to wait on own
4949 // b_go flag
4950 for (f = new_nproc; f < team->t.t_nproc; ++f) {
4951 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4952 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4953 for (int b = 0; b < bs_last_barrier; ++b) {
4954 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4955 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00004956 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004957 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4958 }
4959 }
4960 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004961#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004962 team->t.t_nproc = new_nproc;
4963 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
Jonathan Peytonba55a7b2017-11-29 22:47:52 +00004964 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
Jonathan Peyton30419822017-05-12 18:01:32 +00004965 __kmp_reinitialize_team(team, new_icvs,
4966 root->r.r_uber_thread->th.th_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004967
Jonathan Peyton30419822017-05-12 18:01:32 +00004968 /* update the remaining threads */
4969 for (f = 0; f < new_nproc; ++f) {
4970 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4971 }
4972 // restore the current task state of the master thread: should be the
4973 // implicit task
4974 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
4975 team->t.t_threads[0], team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004976
Jonathan Peyton30419822017-05-12 18:01:32 +00004977 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004978
4979#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004980 for (f = 0; f < team->t.t_nproc; f++) {
4981 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
4982 team->t.t_threads[f]->th.th_team_nproc ==
4983 team->t.t_nproc);
4984 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004985#endif
4986
4987#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004988 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4989#if KMP_AFFINITY_SUPPORTED
4990 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004991#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004992#endif
4993 } else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004994#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00004995 kmp_affin_mask_t *old_mask;
4996 if (KMP_AFFINITY_CAPABLE()) {
4997 KMP_CPU_ALLOC(old_mask);
4998 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004999#endif
5000
Jonathan Peyton30419822017-05-12 18:01:32 +00005001 KA_TRACE(20,
5002 ("__kmp_allocate_team: increasing hot team thread count to %d\n",
5003 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005004
Jonathan Peyton30419822017-05-12 18:01:32 +00005005 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005006
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005007#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005008 int avail_threads = hot_teams[level].hot_team_nth;
5009 if (new_nproc < avail_threads)
5010 avail_threads = new_nproc;
5011 kmp_info_t **other_threads = team->t.t_threads;
5012 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5013 // Adjust barrier data of reserved threads (if any) of the team
5014 // Other data will be set in __kmp_initialize_info() below.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005015 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00005016 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5017 for (b = 0; b < bs_last_barrier; ++b) {
5018 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5019 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005020#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00005021 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00005022#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005023 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005024 }
5025 if (hot_teams[level].hot_team_nth >= new_nproc) {
5026 // we have all needed threads in reserve, no need to allocate any
5027 // this only possible in mode 1, cannot have reserved threads in mode 0
5028 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5029 team->t.t_nproc = new_nproc; // just get reserved threads involved
5030 } else {
5031 // we may have some threads in reserve, but not enough
5032 team->t.t_nproc =
5033 hot_teams[level]
5034 .hot_team_nth; // get reserved threads involved if any
5035 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
5036#endif // KMP_NESTED_HOT_TEAMS
5037 if (team->t.t_max_nproc < new_nproc) {
5038 /* reallocate larger arrays */
5039 __kmp_reallocate_team_arrays(team, new_nproc);
5040 __kmp_reinitialize_team(team, new_icvs, NULL);
5041 }
5042
5043#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5044 /* Temporarily set full mask for master thread before creation of
5045 workers. The reason is that workers inherit the affinity from master,
5046 so if a lot of workers are created on the single core quickly, they
5047 don't get a chance to set their own affinity for a long time. */
5048 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5049#endif
5050
5051 /* allocate new threads for the hot team */
5052 for (f = team->t.t_nproc; f < new_nproc; f++) {
5053 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5054 KMP_DEBUG_ASSERT(new_worker);
5055 team->t.t_threads[f] = new_worker;
5056
5057 KA_TRACE(20,
5058 ("__kmp_allocate_team: team %d init T#%d arrived: "
5059 "join=%llu, plain=%llu\n",
5060 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5061 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5062 team->t.t_bar[bs_plain_barrier].b_arrived));
5063
5064 { // Initialize barrier data for new threads.
5065 int b;
5066 kmp_balign_t *balign = new_worker->th.th_bar;
5067 for (b = 0; b < bs_last_barrier; ++b) {
5068 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5069 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5070 KMP_BARRIER_PARENT_FLAG);
5071#if USE_DEBUGGER
5072 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5073#endif
5074 }
5075 }
5076 }
5077
5078#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5079 if (KMP_AFFINITY_CAPABLE()) {
5080 /* Restore initial master thread's affinity mask */
5081 __kmp_set_system_affinity(old_mask, TRUE);
5082 KMP_CPU_FREE(old_mask);
5083 }
5084#endif
5085#if KMP_NESTED_HOT_TEAMS
5086 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
5087#endif // KMP_NESTED_HOT_TEAMS
5088 /* make sure everyone is syncronized */
5089 int old_nproc = team->t.t_nproc; // save old value and use to update only
5090 // new threads below
5091 __kmp_initialize_team(team, new_nproc, new_icvs,
5092 root->r.r_uber_thread->th.th_ident);
5093
5094 /* reinitialize the threads */
5095 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5096 for (f = 0; f < team->t.t_nproc; ++f)
5097 __kmp_initialize_info(team->t.t_threads[f], team, f,
5098 __kmp_gtid_from_tid(f, team));
5099 if (level) { // set th_task_state for new threads in nested hot team
5100 // __kmp_initialize_info() no longer zeroes th_task_state, so we should
5101 // only need to set the th_task_state for the new threads. th_task_state
5102 // for master thread will not be accurate until after this in
5103 // __kmp_fork_call(), so we look to the master's memo_stack to get the
5104 // correct value.
5105 for (f = old_nproc; f < team->t.t_nproc; ++f)
5106 team->t.t_threads[f]->th.th_task_state =
5107 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5108 } else { // set th_task_state for new threads in non-nested hot team
5109 int old_state =
5110 team->t.t_threads[0]->th.th_task_state; // copy master's state
5111 for (f = old_nproc; f < team->t.t_nproc; ++f)
5112 team->t.t_threads[f]->th.th_task_state = old_state;
5113 }
5114
5115#ifdef KMP_DEBUG
5116 for (f = 0; f < team->t.t_nproc; ++f) {
5117 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5118 team->t.t_threads[f]->th.th_team_nproc ==
5119 team->t.t_nproc);
5120 }
5121#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005122
5123#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00005124 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5125#if KMP_AFFINITY_SUPPORTED
5126 __kmp_partition_places(team);
5127#endif
5128#endif
5129 } // Check changes in number of threads
5130
5131#if OMP_40_ENABLED
5132 kmp_info_t *master = team->t.t_threads[0];
5133 if (master->th.th_teams_microtask) {
5134 for (f = 1; f < new_nproc; ++f) {
5135 // propagate teams construct specific info to workers
5136 kmp_info_t *thr = team->t.t_threads[f];
5137 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5138 thr->th.th_teams_level = master->th.th_teams_level;
5139 thr->th.th_teams_size = master->th.th_teams_size;
5140 }
5141 }
5142#endif /* OMP_40_ENABLED */
5143#if KMP_NESTED_HOT_TEAMS
5144 if (level) {
5145 // Sync barrier state for nested hot teams, not needed for outermost hot
5146 // team.
5147 for (f = 1; f < new_nproc; ++f) {
5148 kmp_info_t *thr = team->t.t_threads[f];
5149 int b;
5150 kmp_balign_t *balign = thr->th.th_bar;
5151 for (b = 0; b < bs_last_barrier; ++b) {
5152 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5153 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5154#if USE_DEBUGGER
5155 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5156#endif
5157 }
5158 }
5159 }
5160#endif // KMP_NESTED_HOT_TEAMS
5161
5162 /* reallocate space for arguments if necessary */
5163 __kmp_alloc_argv_entries(argc, team, TRUE);
5164 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5165 // The hot team re-uses the previous task team,
5166 // if untouched during the previous release->gather phase.
5167
5168 KF_TRACE(10, (" hot_team = %p\n", team));
5169
5170#if KMP_DEBUG
5171 if (__kmp_tasking_mode != tskm_immediate_exec) {
5172 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
5173 "task_team[1] = %p after reinit\n",
5174 team->t.t_task_team[0], team->t.t_task_team[1]));
5175 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005176#endif
5177
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005178#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005179 __ompt_team_assign_id(team, ompt_parallel_data);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005180#endif
5181
Jim Cownie5e8470a2013-09-27 10:38:44 +00005182 KMP_MB();
5183
Jim Cownie5e8470a2013-09-27 10:38:44 +00005184 return team;
Jonathan Peyton30419822017-05-12 18:01:32 +00005185 }
5186
5187 /* next, let's try to take one from the team pool */
5188 KMP_MB();
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005189 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005190 /* TODO: consider resizing undersized teams instead of reaping them, now
5191 that we have a resizing mechanism */
5192 if (team->t.t_max_nproc >= max_nproc) {
5193 /* take this team from the team pool */
5194 __kmp_team_pool = team->t.t_next_pool;
5195
5196 /* setup the team for fresh use */
5197 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5198
5199 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "
5200 "task_team[1] %p to NULL\n",
5201 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5202 team->t.t_task_team[0] = NULL;
5203 team->t.t_task_team[1] = NULL;
5204
5205 /* reallocate space for arguments if necessary */
5206 __kmp_alloc_argv_entries(argc, team, TRUE);
5207 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5208
5209 KA_TRACE(
5210 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5211 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5212 { // Initialize barrier data.
5213 int b;
5214 for (b = 0; b < bs_last_barrier; ++b) {
5215 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5216#if USE_DEBUGGER
5217 team->t.t_bar[b].b_master_arrived = 0;
5218 team->t.t_bar[b].b_team_arrived = 0;
5219#endif
5220 }
5221 }
5222
5223#if OMP_40_ENABLED
5224 team->t.t_proc_bind = new_proc_bind;
5225#endif
5226
5227 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",
5228 team->t.t_id));
5229
5230#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005231 __ompt_team_assign_id(team, ompt_parallel_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005232#endif
5233
5234 KMP_MB();
5235
5236 return team;
5237 }
5238
Jonathan Peyton94a114f2017-10-20 19:30:57 +00005239 /* reap team if it is too small, then loop back and check the next one */
5240 // not sure if this is wise, but, will be redone during the hot-teams
5241 // rewrite.
5242 /* TODO: Use technique to find the right size hot-team, don't reap them */
Jonathan Peyton30419822017-05-12 18:01:32 +00005243 team = __kmp_reap_team(team);
5244 __kmp_team_pool = team;
5245 }
5246
5247 /* nothing available in the pool, no matter, make a new team! */
5248 KMP_MB();
5249 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t));
5250
5251 /* and set it up */
5252 team->t.t_max_nproc = max_nproc;
5253 /* NOTE well, for some reason allocating one big buffer and dividing it up
5254 seems to really hurt performance a lot on the P4, so, let's not use this */
5255 __kmp_allocate_team_arrays(team, max_nproc);
5256
5257 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"));
5258 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5259
5260 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5261 "%p to NULL\n",
5262 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5263 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes
5264 // memory, no need to duplicate
5265 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes
5266 // memory, no need to duplicate
5267
5268 if (__kmp_storage_map) {
5269 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5270 }
5271
5272 /* allocate space for arguments */
5273 __kmp_alloc_argv_entries(argc, team, FALSE);
5274 team->t.t_argc = argc;
5275
5276 KA_TRACE(20,
5277 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5278 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5279 { // Initialize barrier data.
5280 int b;
5281 for (b = 0; b < bs_last_barrier; ++b) {
5282 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5283#if USE_DEBUGGER
5284 team->t.t_bar[b].b_master_arrived = 0;
5285 team->t.t_bar[b].b_team_arrived = 0;
5286#endif
5287 }
5288 }
5289
5290#if OMP_40_ENABLED
5291 team->t.t_proc_bind = new_proc_bind;
5292#endif
5293
5294#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005295 __ompt_team_assign_id(team, ompt_parallel_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005296 team->t.ompt_serialized_team_info = NULL;
5297#endif
5298
5299 KMP_MB();
5300
5301 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",
5302 team->t.t_id));
5303
5304 return team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005305}
5306
5307/* TODO implement hot-teams at all levels */
5308/* TODO implement lazy thread release on demand (disband request) */
5309
5310/* free the team. return it to the team pool. release all the threads
5311 * associated with it */
Jonathan Peyton30419822017-05-12 18:01:32 +00005312void __kmp_free_team(kmp_root_t *root,
5313 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5314 int f;
5315 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5316 team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005317
Jonathan Peyton30419822017-05-12 18:01:32 +00005318 /* verify state */
5319 KMP_DEBUG_ASSERT(root);
5320 KMP_DEBUG_ASSERT(team);
5321 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5322 KMP_DEBUG_ASSERT(team->t.t_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005323
Jonathan Peyton30419822017-05-12 18:01:32 +00005324 int use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005325#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005326 int level;
5327 kmp_hot_team_ptr_t *hot_teams;
5328 if (master) {
5329 level = team->t.t_active_level - 1;
5330 if (master->th.th_teams_microtask) { // in teams construct?
5331 if (master->th.th_teams_size.nteams > 1) {
5332 ++level; // level was not increased in teams construct for
5333 // team_of_masters
5334 }
5335 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5336 master->th.th_teams_level == team->t.t_level) {
5337 ++level; // level was not increased in teams construct for
5338 // team_of_workers before the parallel
5339 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005340 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005341 hot_teams = master->th.th_hot_teams;
5342 if (level < __kmp_hot_teams_max_level) {
5343 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5344 use_hot_team = 1;
5345 }
5346 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005347#endif // KMP_NESTED_HOT_TEAMS
5348
Jonathan Peyton30419822017-05-12 18:01:32 +00005349 /* team is done working */
5350 TCW_SYNC_PTR(team->t.t_pkfn,
5351 NULL); // Important for Debugging Support Library.
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00005352#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00005353 team->t.t_copyin_counter = 0; // init counter for possible reuse
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00005354#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005355 // Do not reset pointer to parent team to NULL for hot teams.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005356
Jonathan Peyton30419822017-05-12 18:01:32 +00005357 /* if we are non-hot team, release our threads */
5358 if (!use_hot_team) {
5359 if (__kmp_tasking_mode != tskm_immediate_exec) {
5360 // Wait for threads to reach reapable state
5361 for (f = 1; f < team->t.t_nproc; ++f) {
5362 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5363 kmp_info_t *th = team->t.t_threads[f];
5364 volatile kmp_uint32 *state = &th->th.th_reap_state;
5365 while (*state != KMP_SAFE_TO_REAP) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005366#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00005367 // On Windows a thread can be killed at any time, check this
5368 DWORD ecode;
5369 if (!__kmp_is_thread_alive(th, &ecode)) {
5370 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
5371 break;
5372 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005373#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005374 // first check if thread is sleeping
5375 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5376 if (fl.is_sleeping())
5377 fl.resume(__kmp_gtid_from_thread(th));
5378 KMP_CPU_PAUSE();
5379 }
5380 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005381
Jonathan Peyton30419822017-05-12 18:01:32 +00005382 // Delete task teams
5383 int tt_idx;
5384 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5385 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5386 if (task_team != NULL) {
5387 for (f = 0; f < team->t.t_nproc;
5388 ++f) { // Have all threads unref task teams
5389 team->t.t_threads[f]->th.th_task_team = NULL;
5390 }
5391 KA_TRACE(
5392 20,
5393 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5394 __kmp_get_gtid(), task_team, team->t.t_id));
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005395#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005396 __kmp_free_task_team(master, task_team);
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005397#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005398 team->t.t_task_team[tt_idx] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005399 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005400 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005401 }
5402
Jonathan Peyton30419822017-05-12 18:01:32 +00005403 // Reset pointer to parent team only for non-hot teams.
5404 team->t.t_parent = NULL;
5405 team->t.t_level = 0;
5406 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005407
Jonathan Peyton30419822017-05-12 18:01:32 +00005408 /* free the worker threads */
5409 for (f = 1; f < team->t.t_nproc; ++f) {
5410 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5411 __kmp_free_thread(team->t.t_threads[f]);
5412 team->t.t_threads[f] = NULL;
5413 }
5414
5415 /* put the team back in the team pool */
5416 /* TODO limit size of team pool, call reap_team if pool too large */
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005417 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005418 __kmp_team_pool = (volatile kmp_team_t *)team;
5419 }
5420
5421 KMP_MB();
5422}
Jim Cownie5e8470a2013-09-27 10:38:44 +00005423
5424/* reap the team. destroy it, reclaim all its resources and free its memory */
Jonathan Peyton30419822017-05-12 18:01:32 +00005425kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5426 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005427
Jonathan Peyton30419822017-05-12 18:01:32 +00005428 KMP_DEBUG_ASSERT(team);
5429 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5430 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5431 KMP_DEBUG_ASSERT(team->t.t_threads);
5432 KMP_DEBUG_ASSERT(team->t.t_argv);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005433
Jonathan Peyton30419822017-05-12 18:01:32 +00005434 /* TODO clean the threads that are a part of this? */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005435
Jonathan Peyton30419822017-05-12 18:01:32 +00005436 /* free stuff */
5437 __kmp_free_team_arrays(team);
5438 if (team->t.t_argv != &team->t.t_inline_argv[0])
5439 __kmp_free((void *)team->t.t_argv);
5440 __kmp_free(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005441
Jonathan Peyton30419822017-05-12 18:01:32 +00005442 KMP_MB();
5443 return next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005444}
5445
Jim Cownie5e8470a2013-09-27 10:38:44 +00005446// Free the thread. Don't reap it, just place it on the pool of available
5447// threads.
5448//
5449// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5450// binding for the affinity mechanism to be useful.
5451//
5452// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5453// However, we want to avoid a potential performance problem by always
5454// scanning through the list to find the correct point at which to insert
5455// the thread (potential N**2 behavior). To do this we keep track of the
5456// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5457// With single-level parallelism, threads will always be added to the tail
5458// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5459// parallelism, all bets are off and we may need to scan through the entire
5460// free list.
5461//
5462// This change also has a potentially large performance benefit, for some
5463// applications. Previously, as threads were freed from the hot team, they
5464// would be placed back on the free list in inverse order. If the hot team
5465// grew back to it's original size, then the freed thread would be placed
5466// back on the hot team in reverse order. This could cause bad cache
5467// locality problems on programs where the size of the hot team regularly
5468// grew and shrunk.
5469//
5470// Now, for single-level parallelism, the OMP tid is alway == gtid.
Jonathan Peyton30419822017-05-12 18:01:32 +00005471void __kmp_free_thread(kmp_info_t *this_th) {
5472 int gtid;
5473 kmp_info_t **scan;
Jonathan Peytonf4392462017-07-27 20:58:41 +00005474 kmp_root_t *root = this_th->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005475
Jonathan Peyton30419822017-05-12 18:01:32 +00005476 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5477 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005478
Jonathan Peyton30419822017-05-12 18:01:32 +00005479 KMP_DEBUG_ASSERT(this_th);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005480
Jonathan Peyton30419822017-05-12 18:01:32 +00005481 // When moving thread to pool, switch thread to wait on own b_go flag, and
5482 // uninitialized (NULL team).
5483 int b;
5484 kmp_balign_t *balign = this_th->th.th_bar;
5485 for (b = 0; b < bs_last_barrier; ++b) {
5486 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5487 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5488 balign[b].bb.team = NULL;
5489 balign[b].bb.leaf_kids = 0;
5490 }
5491 this_th->th.th_task_state = 0;
Andrey Churbanov3336aa02018-03-19 18:05:15 +00005492 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
Jonathan Peyton30419822017-05-12 18:01:32 +00005493
5494 /* put thread back on the free pool */
5495 TCW_PTR(this_th->th.th_team, NULL);
5496 TCW_PTR(this_th->th.th_root, NULL);
5497 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5498
Jonathan Peytonbff8ded2018-01-10 18:24:09 +00005499 /* If the implicit task assigned to this thread can be used by other threads
5500 * -> multiple threads can share the data and try to free the task at
5501 * __kmp_reap_thread at exit. This duplicate use of the task data can happen
5502 * with higher probability when hot team is disabled but can occurs even when
5503 * the hot team is enabled */
5504 __kmp_free_implicit_task(this_th);
5505 this_th->th.th_current_task = NULL;
5506
Jonathan Peyton30419822017-05-12 18:01:32 +00005507 // If the __kmp_thread_pool_insert_pt is already past the new insert
5508 // point, then we need to re-scan the entire list.
5509 gtid = this_th->th.th_info.ds.ds_gtid;
5510 if (__kmp_thread_pool_insert_pt != NULL) {
5511 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5512 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5513 __kmp_thread_pool_insert_pt = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005514 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005515 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005516
Jonathan Peyton30419822017-05-12 18:01:32 +00005517 // Scan down the list to find the place to insert the thread.
5518 // scan is the address of a link in the list, possibly the address of
5519 // __kmp_thread_pool itself.
5520 //
5521 // In the absence of nested parallism, the for loop will have 0 iterations.
5522 if (__kmp_thread_pool_insert_pt != NULL) {
5523 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5524 } else {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005525 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005526 }
5527 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5528 scan = &((*scan)->th.th_next_pool))
5529 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005530
Jonathan Peyton30419822017-05-12 18:01:32 +00005531 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5532 // to its address.
5533 TCW_PTR(this_th->th.th_next_pool, *scan);
5534 __kmp_thread_pool_insert_pt = *scan = this_th;
5535 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5536 (this_th->th.th_info.ds.ds_gtid <
5537 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5538 TCW_4(this_th->th.th_in_pool, TRUE);
5539 __kmp_thread_pool_nth++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005540
Jonathan Peyton30419822017-05-12 18:01:32 +00005541 TCW_4(__kmp_nth, __kmp_nth - 1);
Jonathan Peytonf4392462017-07-27 20:58:41 +00005542 root->r.r_cg_nthreads--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005543
5544#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005545 /* Adjust blocktime back to user setting or default if necessary */
5546 /* Middle initialization might never have occurred */
5547 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5548 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5549 if (__kmp_nth <= __kmp_avail_proc) {
5550 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005551 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005552 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005553#endif /* KMP_ADJUST_BLOCKTIME */
5554
Jonathan Peyton30419822017-05-12 18:01:32 +00005555 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005556}
5557
Jim Cownie5e8470a2013-09-27 10:38:44 +00005558/* ------------------------------------------------------------------------ */
5559
Jonathan Peyton30419822017-05-12 18:01:32 +00005560void *__kmp_launch_thread(kmp_info_t *this_thr) {
5561 int gtid = this_thr->th.th_info.ds.ds_gtid;
5562 /* void *stack_data;*/
5563 kmp_team_t *(*volatile pteam);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005564
Jonathan Peyton30419822017-05-12 18:01:32 +00005565 KMP_MB();
5566 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005567
Jonathan Peyton30419822017-05-12 18:01:32 +00005568 if (__kmp_env_consistency_check) {
5569 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
5570 }
5571
5572#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005573 ompt_data_t *thread_data;
5574 if (ompt_enabled.enabled) {
5575 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5576 thread_data->ptr = NULL;
5577
5578 this_thr->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005579 this_thr->th.ompt_thread_info.wait_id = 0;
Joachim Protze82e94a52017-11-01 10:08:30 +00005580 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5581 if (ompt_enabled.ompt_callback_thread_begin) {
5582 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5583 ompt_thread_worker, thread_data);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005584 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005585 }
5586#endif
5587
Joachim Protze82e94a52017-11-01 10:08:30 +00005588#if OMPT_SUPPORT
5589 if (ompt_enabled.enabled) {
5590 this_thr->th.ompt_thread_info.state = omp_state_idle;
5591 }
5592#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005593 /* This is the place where threads wait for work */
5594 while (!TCR_4(__kmp_global.g.g_done)) {
5595 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5596 KMP_MB();
5597
5598 /* wait for work to do */
5599 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005600
Jonathan Peyton30419822017-05-12 18:01:32 +00005601 /* No tid yet since not part of a team */
5602 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5603
5604#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005605 if (ompt_enabled.enabled) {
5606 this_thr->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005607 }
5608#endif
5609
5610 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5611
5612 /* have we been allocated? */
5613 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005614 /* we were just woken up, so run our new task */
5615 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5616 int rc;
5617 KA_TRACE(20,
5618 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5619 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5620 (*pteam)->t.t_pkfn));
5621
5622 updateHWFPControl(*pteam);
5623
5624#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005625 if (ompt_enabled.enabled) {
5626 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00005627 }
5628#endif
5629
Jonathan Peytonf0682ac2018-07-30 17:41:08 +00005630 rc = (*pteam)->t.t_invoke(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00005631 KMP_ASSERT(rc);
5632
Jim Cownie5e8470a2013-09-27 10:38:44 +00005633 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00005634 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5635 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5636 (*pteam)->t.t_pkfn));
5637 }
Joachim Protze82e94a52017-11-01 10:08:30 +00005638#if OMPT_SUPPORT
5639 if (ompt_enabled.enabled) {
5640 /* no frame set while outside task */
Joachim Protzec255ca72017-11-05 14:11:10 +00005641 __ompt_get_task_info_object(0)->frame.exit_frame = NULL;
Joachim Protze82e94a52017-11-01 10:08:30 +00005642
5643 this_thr->th.ompt_thread_info.state = omp_state_overhead;
Jonathan Peyton30419822017-05-12 18:01:32 +00005644 }
5645#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00005646 /* join barrier after parallel region */
5647 __kmp_join_barrier(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005648 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005649 }
5650 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005651
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005652#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00005653 if (ompt_enabled.ompt_callback_thread_end) {
5654 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
Jonathan Peyton30419822017-05-12 18:01:32 +00005655 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005656#endif
5657
Jonathan Peyton30419822017-05-12 18:01:32 +00005658 this_thr->th.th_task_team = NULL;
5659 /* run the destructors for the threadprivate data for this thread */
5660 __kmp_common_destroy_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005661
Jonathan Peyton30419822017-05-12 18:01:32 +00005662 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
5663 KMP_MB();
5664 return this_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005665}
5666
5667/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005668
Jonathan Peyton30419822017-05-12 18:01:32 +00005669void __kmp_internal_end_dest(void *specific_gtid) {
5670#if KMP_COMPILER_ICC
5671#pragma warning(push)
5672#pragma warning(disable : 810) // conversion from "void *" to "int" may lose
5673// significant bits
5674#endif
5675 // Make sure no significant bits are lost
5676 int gtid = (kmp_intptr_t)specific_gtid - 1;
5677#if KMP_COMPILER_ICC
5678#pragma warning(pop)
5679#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005680
Jonathan Peyton30419822017-05-12 18:01:32 +00005681 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5682 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5683 * this is because 0 is reserved for the nothing-stored case */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005684
Jonathan Peyton30419822017-05-12 18:01:32 +00005685 /* josh: One reason for setting the gtid specific data even when it is being
5686 destroyed by pthread is to allow gtid lookup through thread specific data
5687 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5688 that gets executed in the call to __kmp_internal_end_thread, actually
5689 gets the gtid through the thread specific data. Setting it here seems
5690 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5691 to run smoothly.
5692 todo: get rid of this after we remove the dependence on
5693 __kmp_gtid_get_specific */
5694 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5695 __kmp_gtid_set_specific(gtid);
5696#ifdef KMP_TDATA_GTID
5697 __kmp_gtid = gtid;
5698#endif
5699 __kmp_internal_end_thread(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005700}
5701
Jonathan Peyton99016992015-05-26 17:32:53 +00005702#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005703
Jonathan Peyton30419822017-05-12 18:01:32 +00005704// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases
5705// destructors work perfectly, but in real libomp.so I have no evidence it is
5706// ever called. However, -fini linker option in makefile.mk works fine.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005707
Jonathan Peyton30419822017-05-12 18:01:32 +00005708__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
5709 __kmp_internal_end_atexit();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005710}
5711
Jonathan Peyton30419822017-05-12 18:01:32 +00005712void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005713
5714#endif
5715
Jonathan Peyton30419822017-05-12 18:01:32 +00005716/* [Windows] josh: when the atexit handler is called, there may still be more
5717 than one thread alive */
5718void __kmp_internal_end_atexit(void) {
5719 KA_TRACE(30, ("__kmp_internal_end_atexit\n"));
5720 /* [Windows]
5721 josh: ideally, we want to completely shutdown the library in this atexit
5722 handler, but stat code that depends on thread specific data for gtid fails
5723 because that data becomes unavailable at some point during the shutdown, so
5724 we call __kmp_internal_end_thread instead. We should eventually remove the
5725 dependency on __kmp_get_specific_gtid in the stat code and use
5726 __kmp_internal_end_library to cleanly shutdown the library.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005727
Jonathan Peyton30419822017-05-12 18:01:32 +00005728 // TODO: Can some of this comment about GVS be removed?
5729 I suspect that the offending stat code is executed when the calling thread
5730 tries to clean up a dead root thread's data structures, resulting in GVS
5731 code trying to close the GVS structures for that thread, but since the stat
5732 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
5733 the calling thread is cleaning up itself instead of another thread, it get
5734 confused. This happens because allowing a thread to unregister and cleanup
5735 another thread is a recent modification for addressing an issue.
5736 Based on the current design (20050722), a thread may end up
5737 trying to unregister another thread only if thread death does not trigger
5738 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
5739 thread specific data destructor function to detect thread death. For
5740 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
5741 is nothing. Thus, the workaround is applicable only for Windows static
5742 stat library. */
5743 __kmp_internal_end_library(-1);
5744#if KMP_OS_WINDOWS
5745 __kmp_close_console();
5746#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005747}
5748
Jonathan Peyton30419822017-05-12 18:01:32 +00005749static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
5750 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005751
Jonathan Peyton30419822017-05-12 18:01:32 +00005752 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005753
Jonathan Peyton30419822017-05-12 18:01:32 +00005754 KMP_DEBUG_ASSERT(thread != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005755
Jonathan Peyton30419822017-05-12 18:01:32 +00005756 gtid = thread->th.th_info.ds.ds_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005757
Jonathan Peyton30419822017-05-12 18:01:32 +00005758 if (!is_root) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005759
Jonathan Peyton30419822017-05-12 18:01:32 +00005760 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5761 /* Assume the threads are at the fork barrier here */
5762 KA_TRACE(
5763 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5764 gtid));
5765 /* Need release fence here to prevent seg faults for tree forkjoin barrier
5766 * (GEH) */
5767 ANNOTATE_HAPPENS_BEFORE(thread);
5768 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5769 __kmp_release_64(&flag);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005770 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005771
Jonathan Peyton30419822017-05-12 18:01:32 +00005772 // Terminate OS thread.
5773 __kmp_reap_worker(thread);
Jonathan Peyton7ca7ef02016-11-21 16:18:57 +00005774
Jonathan Peyton30419822017-05-12 18:01:32 +00005775 // The thread was killed asynchronously. If it was actively
5776 // spinning in the thread pool, decrement the global count.
5777 //
5778 // There is a small timing hole here - if the worker thread was just waking
5779 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
5780 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
5781 // the global counter might not get updated.
5782 //
5783 // Currently, this can only happen as the library is unloaded,
5784 // so there are no harmful side effects.
5785 if (thread->th.th_active_in_pool) {
5786 thread->th.th_active_in_pool = FALSE;
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00005787 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
5788 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
Jonathan Peyton30419822017-05-12 18:01:32 +00005789 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005790
Jonathan Peyton30419822017-05-12 18:01:32 +00005791 // Decrement # of [worker] threads in the pool.
5792 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5793 --__kmp_thread_pool_nth;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005794 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005795
Jonathan Peyton30419822017-05-12 18:01:32 +00005796 __kmp_free_implicit_task(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005797
Jonathan Peyton30419822017-05-12 18:01:32 +00005798// Free the fast memory for tasking
5799#if USE_FAST_MEMORY
5800 __kmp_free_fast_memory(thread);
5801#endif /* USE_FAST_MEMORY */
5802
5803 __kmp_suspend_uninitialize_thread(thread);
5804
5805 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5806 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5807
5808 --__kmp_all_nth;
5809// __kmp_nth was decremented when thread is added to the pool.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005810
5811#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005812 /* Adjust blocktime back to user setting or default if necessary */
5813 /* Middle initialization might never have occurred */
5814 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5815 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5816 if (__kmp_nth <= __kmp_avail_proc) {
5817 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005818 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005819 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005820#endif /* KMP_ADJUST_BLOCKTIME */
5821
Jonathan Peyton30419822017-05-12 18:01:32 +00005822 /* free the memory being used */
5823 if (__kmp_env_consistency_check) {
5824 if (thread->th.th_cons) {
5825 __kmp_free_cons_stack(thread->th.th_cons);
5826 thread->th.th_cons = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005827 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005828 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005829
Jonathan Peyton30419822017-05-12 18:01:32 +00005830 if (thread->th.th_pri_common != NULL) {
5831 __kmp_free(thread->th.th_pri_common);
5832 thread->th.th_pri_common = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005833 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005834
Jonathan Peyton30419822017-05-12 18:01:32 +00005835 if (thread->th.th_task_state_memo_stack != NULL) {
5836 __kmp_free(thread->th.th_task_state_memo_stack);
5837 thread->th.th_task_state_memo_stack = NULL;
5838 }
5839
5840#if KMP_USE_BGET
5841 if (thread->th.th_local.bget_data != NULL) {
5842 __kmp_finalize_bget(thread);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005843 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005844#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005845
Alp Toker98758b02014-03-02 04:12:06 +00005846#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00005847 if (thread->th.th_affin_mask != NULL) {
5848 KMP_CPU_FREE(thread->th.th_affin_mask);
5849 thread->th.th_affin_mask = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005850 }
Alp Toker98758b02014-03-02 04:12:06 +00005851#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005852
Jonathan Peytonf6399362018-07-09 17:51:13 +00005853#if KMP_USE_HIER_SCHED
5854 if (thread->th.th_hier_bar_data != NULL) {
5855 __kmp_free(thread->th.th_hier_bar_data);
5856 thread->th.th_hier_bar_data = NULL;
5857 }
5858#endif
5859
Jonathan Peyton30419822017-05-12 18:01:32 +00005860 __kmp_reap_team(thread->th.th_serial_team);
5861 thread->th.th_serial_team = NULL;
5862 __kmp_free(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005863
Jonathan Peyton30419822017-05-12 18:01:32 +00005864 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005865
5866} // __kmp_reap_thread
5867
Jonathan Peyton30419822017-05-12 18:01:32 +00005868static void __kmp_internal_end(void) {
5869 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005870
Jonathan Peyton30419822017-05-12 18:01:32 +00005871 /* First, unregister the library */
5872 __kmp_unregister_library();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005873
Jonathan Peyton30419822017-05-12 18:01:32 +00005874#if KMP_OS_WINDOWS
5875 /* In Win static library, we can't tell when a root actually dies, so we
5876 reclaim the data structures for any root threads that have died but not
5877 unregistered themselves, in order to shut down cleanly.
5878 In Win dynamic library we also can't tell when a thread dies. */
5879 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
5880// dead roots
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005881#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005882
Jonathan Peyton30419822017-05-12 18:01:32 +00005883 for (i = 0; i < __kmp_threads_capacity; i++)
5884 if (__kmp_root[i])
5885 if (__kmp_root[i]->r.r_active)
5886 break;
5887 KMP_MB(); /* Flush all pending memory write invalidates. */
5888 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5889
5890 if (i < __kmp_threads_capacity) {
5891#if KMP_USE_MONITOR
5892 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5893 KMP_MB(); /* Flush all pending memory write invalidates. */
5894
Jonathan Peyton94a114f2017-10-20 19:30:57 +00005895 // Need to check that monitor was initialized before reaping it. If we are
5896 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
5897 // __kmp_monitor will appear to contain valid data, but it is only valid in
5898 // the parent process, not the child.
Jonathan Peyton30419822017-05-12 18:01:32 +00005899 // New behavior (201008): instead of keying off of the flag
5900 // __kmp_init_parallel, the monitor thread creation is keyed off
5901 // of the new flag __kmp_init_monitor.
5902 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5903 if (TCR_4(__kmp_init_monitor)) {
5904 __kmp_reap_monitor(&__kmp_monitor);
5905 TCW_4(__kmp_init_monitor, 0);
5906 }
5907 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5908 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
5909#endif // KMP_USE_MONITOR
5910 } else {
5911/* TODO move this to cleanup code */
5912#ifdef KMP_DEBUG
5913 /* make sure that everything has properly ended */
5914 for (i = 0; i < __kmp_threads_capacity; i++) {
5915 if (__kmp_root[i]) {
5916 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
5917 // there can be uber threads alive here
5918 KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active?
5919 }
5920 }
5921#endif
5922
5923 KMP_MB();
5924
5925 // Reap the worker threads.
5926 // This is valid for now, but be careful if threads are reaped sooner.
5927 while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool.
5928 // Get the next thread from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005929 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005930 __kmp_thread_pool = thread->th.th_next_pool;
5931 // Reap it.
5932 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
5933 thread->th.th_next_pool = NULL;
5934 thread->th.th_in_pool = FALSE;
5935 __kmp_reap_thread(thread, 0);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005936 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005937 __kmp_thread_pool_insert_pt = NULL;
5938
5939 // Reap teams.
5940 while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool.
5941 // Get the next team from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005942 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005943 __kmp_team_pool = team->t.t_next_pool;
5944 // Reap it.
5945 team->t.t_next_pool = NULL;
5946 __kmp_reap_team(team);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00005947 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005948
5949 __kmp_reap_task_teams();
5950
Jonathan Peytona764af62018-07-19 19:17:00 +00005951#if KMP_OS_UNIX
5952 // Threads that are not reaped should not access any resources since they
5953 // are going to be deallocated soon, so the shutdown sequence should wait
5954 // until all threads either exit the final spin-waiting loop or begin
5955 // sleeping after the given blocktime.
5956 for (i = 0; i < __kmp_threads_capacity; i++) {
5957 kmp_info_t *thr = __kmp_threads[i];
5958 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
5959 KMP_CPU_PAUSE();
5960 }
5961#endif
5962
Jonathan Peyton30419822017-05-12 18:01:32 +00005963 for (i = 0; i < __kmp_threads_capacity; ++i) {
5964 // TBD: Add some checking...
5965 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5966 }
5967
5968 /* Make sure all threadprivate destructors get run by joining with all
5969 worker threads before resetting this flag */
5970 TCW_SYNC_4(__kmp_init_common, FALSE);
5971
5972 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"));
5973 KMP_MB();
5974
5975#if KMP_USE_MONITOR
5976 // See note above: One of the possible fixes for CQ138434 / CQ140126
5977 //
5978 // FIXME: push both code fragments down and CSE them?
5979 // push them into __kmp_cleanup() ?
5980 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5981 if (TCR_4(__kmp_init_monitor)) {
5982 __kmp_reap_monitor(&__kmp_monitor);
5983 TCW_4(__kmp_init_monitor, 0);
5984 }
5985 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5986 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
5987#endif
5988 } /* else !__kmp_global.t_active */
5989 TCW_4(__kmp_init_gtid, FALSE);
5990 KMP_MB(); /* Flush all pending memory write invalidates. */
5991
5992 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005993#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00005994 ompt_fini();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005995#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005996}
5997
Jonathan Peyton30419822017-05-12 18:01:32 +00005998void __kmp_internal_end_library(int gtid_req) {
5999 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6000 /* this shouldn't be a race condition because __kmp_internal_end() is the
6001 only place to clear __kmp_serial_init */
6002 /* we'll check this later too, after we get the lock */
6003 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6004 // redundaant, because the next check will work in any case.
6005 if (__kmp_global.g.g_abort) {
6006 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"));
6007 /* TODO abort? */
6008 return;
6009 }
6010 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6011 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"));
6012 return;
6013 }
6014
6015 KMP_MB(); /* Flush all pending memory write invalidates. */
6016
6017 /* find out who we are and what we should do */
6018 {
6019 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6020 KA_TRACE(
6021 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6022 if (gtid == KMP_GTID_SHUTDOWN) {
6023 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "
6024 "already shutdown\n"));
6025 return;
6026 } else if (gtid == KMP_GTID_MONITOR) {
6027 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "
6028 "registered, or system shutdown\n"));
6029 return;
6030 } else if (gtid == KMP_GTID_DNE) {
6031 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "
6032 "shutdown\n"));
6033 /* we don't know who we are, but we may still shutdown the library */
6034 } else if (KMP_UBER_GTID(gtid)) {
6035 /* unregister ourselves as an uber thread. gtid is no longer valid */
6036 if (__kmp_root[gtid]->r.r_active) {
6037 __kmp_global.g.g_abort = -1;
6038 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6039 KA_TRACE(10,
6040 ("__kmp_internal_end_library: root still active, abort T#%d\n",
6041 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006042 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006043 } else {
6044 KA_TRACE(
6045 10,
6046 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6047 __kmp_unregister_root_current_thread(gtid);
6048 }
6049 } else {
6050/* worker threads may call this function through the atexit handler, if they
6051 * call exit() */
6052/* For now, skip the usual subsequent processing and just dump the debug buffer.
6053 TODO: do a thorough shutdown instead */
6054#ifdef DUMP_DEBUG_ON_EXIT
6055 if (__kmp_debug_buf)
6056 __kmp_dump_debug_buffer();
6057#endif
6058 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006059 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006060 }
6061 /* synchronize the termination process */
6062 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006063
Jonathan Peyton30419822017-05-12 18:01:32 +00006064 /* have we already finished */
6065 if (__kmp_global.g.g_abort) {
6066 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"));
6067 /* TODO abort? */
6068 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6069 return;
6070 }
6071 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6072 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6073 return;
6074 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006075
Jonathan Peyton30419822017-05-12 18:01:32 +00006076 /* We need this lock to enforce mutex between this reading of
6077 __kmp_threads_capacity and the writing by __kmp_register_root.
6078 Alternatively, we can use a counter of roots that is atomically updated by
6079 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6080 __kmp_internal_end_*. */
6081 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006082
Jonathan Peyton30419822017-05-12 18:01:32 +00006083 /* now we can safely conduct the actual termination */
6084 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006085
Jonathan Peyton30419822017-05-12 18:01:32 +00006086 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6087 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006088
Jonathan Peyton30419822017-05-12 18:01:32 +00006089 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006090
Jonathan Peyton30419822017-05-12 18:01:32 +00006091#ifdef DUMP_DEBUG_ON_EXIT
6092 if (__kmp_debug_buf)
6093 __kmp_dump_debug_buffer();
6094#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006095
Jonathan Peyton30419822017-05-12 18:01:32 +00006096#if KMP_OS_WINDOWS
6097 __kmp_close_console();
6098#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006099
Jonathan Peyton30419822017-05-12 18:01:32 +00006100 __kmp_fini_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006101
6102} // __kmp_internal_end_library
6103
Jonathan Peyton30419822017-05-12 18:01:32 +00006104void __kmp_internal_end_thread(int gtid_req) {
6105 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006106
Jonathan Peyton30419822017-05-12 18:01:32 +00006107 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6108 /* this shouldn't be a race condition because __kmp_internal_end() is the
6109 * only place to clear __kmp_serial_init */
6110 /* we'll check this later too, after we get the lock */
6111 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6112 // redundant, because the next check will work in any case.
6113 if (__kmp_global.g.g_abort) {
6114 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"));
6115 /* TODO abort? */
6116 return;
6117 }
6118 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6119 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"));
6120 return;
6121 }
6122
6123 KMP_MB(); /* Flush all pending memory write invalidates. */
6124
6125 /* find out who we are and what we should do */
6126 {
6127 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6128 KA_TRACE(10,
6129 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6130 if (gtid == KMP_GTID_SHUTDOWN) {
6131 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
6132 "already shutdown\n"));
6133 return;
6134 } else if (gtid == KMP_GTID_MONITOR) {
6135 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "
6136 "registered, or system shutdown\n"));
6137 return;
6138 } else if (gtid == KMP_GTID_DNE) {
6139 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "
6140 "shutdown\n"));
6141 return;
6142 /* we don't know who we are */
6143 } else if (KMP_UBER_GTID(gtid)) {
6144 /* unregister ourselves as an uber thread. gtid is no longer valid */
6145 if (__kmp_root[gtid]->r.r_active) {
6146 __kmp_global.g.g_abort = -1;
6147 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6148 KA_TRACE(10,
6149 ("__kmp_internal_end_thread: root still active, abort T#%d\n",
6150 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006151 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006152 } else {
6153 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",
6154 gtid));
6155 __kmp_unregister_root_current_thread(gtid);
6156 }
6157 } else {
6158 /* just a worker thread, let's leave */
6159 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6160
6161 if (gtid >= 0) {
6162 __kmp_threads[gtid]->th.th_task_team = NULL;
6163 }
6164
6165 KA_TRACE(10,
6166 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6167 gtid));
6168 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006169 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006170 }
6171#if defined KMP_DYNAMIC_LIB
6172 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber
6173 // thread, because we will better shutdown later in the library destructor.
6174 // The reason of this change is performance problem when non-openmp thread in
6175 // a loop forks and joins many openmp threads. We can save a lot of time
6176 // keeping worker threads alive until the program shutdown.
6177 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966)
6178 // and Windows(DPD200287443) that occurs when using critical sections from
6179 // foreign threads.
6180 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6181 return;
6182#endif
6183 /* synchronize the termination process */
6184 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006185
Jonathan Peyton30419822017-05-12 18:01:32 +00006186 /* have we already finished */
6187 if (__kmp_global.g.g_abort) {
6188 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"));
6189 /* TODO abort? */
6190 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6191 return;
6192 }
6193 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6194 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6195 return;
6196 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006197
Jonathan Peyton30419822017-05-12 18:01:32 +00006198 /* We need this lock to enforce mutex between this reading of
6199 __kmp_threads_capacity and the writing by __kmp_register_root.
6200 Alternatively, we can use a counter of roots that is atomically updated by
6201 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6202 __kmp_internal_end_*. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006203
Jonathan Peyton30419822017-05-12 18:01:32 +00006204 /* should we finish the run-time? are all siblings done? */
6205 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006206
Jonathan Peyton30419822017-05-12 18:01:32 +00006207 for (i = 0; i < __kmp_threads_capacity; ++i) {
6208 if (KMP_UBER_GTID(i)) {
6209 KA_TRACE(
6210 10,
6211 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6212 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6213 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6214 return;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006215 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006216 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006217
Jonathan Peyton30419822017-05-12 18:01:32 +00006218 /* now we can safely conduct the actual termination */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006219
Jonathan Peyton30419822017-05-12 18:01:32 +00006220 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006221
Jonathan Peyton30419822017-05-12 18:01:32 +00006222 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6223 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006224
Jonathan Peyton30419822017-05-12 18:01:32 +00006225 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006226
Jonathan Peyton30419822017-05-12 18:01:32 +00006227#ifdef DUMP_DEBUG_ON_EXIT
6228 if (__kmp_debug_buf)
6229 __kmp_dump_debug_buffer();
6230#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006231} // __kmp_internal_end_thread
6232
Jonathan Peyton30419822017-05-12 18:01:32 +00006233// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006234// Library registration stuff.
6235
Jonathan Peyton30419822017-05-12 18:01:32 +00006236static long __kmp_registration_flag = 0;
6237// Random value used to indicate library initialization.
6238static char *__kmp_registration_str = NULL;
6239// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006240
Jonathan Peyton30419822017-05-12 18:01:32 +00006241static inline char *__kmp_reg_status_name() {
6242 /* On RHEL 3u5 if linked statically, getpid() returns different values in
6243 each thread. If registration and unregistration go in different threads
6244 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6245 env var can not be found, because the name will contain different pid. */
6246 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00006247} // __kmp_reg_status_get
6248
Jonathan Peyton30419822017-05-12 18:01:32 +00006249void __kmp_register_library_startup(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006250
Jonathan Peyton30419822017-05-12 18:01:32 +00006251 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6252 int done = 0;
6253 union {
6254 double dtime;
6255 long ltime;
6256 } time;
6257#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6258 __kmp_initialize_system_tick();
6259#endif
6260 __kmp_read_system_time(&time.dtime);
6261 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6262 __kmp_registration_str =
6263 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,
6264 __kmp_registration_flag, KMP_LIBRARY_FILE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006265
Jonathan Peyton30419822017-05-12 18:01:32 +00006266 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,
6267 __kmp_registration_str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006268
Jonathan Peyton30419822017-05-12 18:01:32 +00006269 while (!done) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006270
Jonathan Peyton30419822017-05-12 18:01:32 +00006271 char *value = NULL; // Actual value of the environment variable.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006272
Jonathan Peyton30419822017-05-12 18:01:32 +00006273 // Set environment variable, but do not overwrite if it is exist.
6274 __kmp_env_set(name, __kmp_registration_str, 0);
6275 // Check the variable is written.
6276 value = __kmp_env_get(name);
6277 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006278
Jonathan Peyton30419822017-05-12 18:01:32 +00006279 done = 1; // Ok, environment variable set successfully, exit the loop.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006280
Jonathan Peyton30419822017-05-12 18:01:32 +00006281 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006282
Jonathan Peyton30419822017-05-12 18:01:32 +00006283 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6284 // Check whether it alive or dead.
6285 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6286 char *tail = value;
6287 char *flag_addr_str = NULL;
6288 char *flag_val_str = NULL;
6289 char const *file_name = NULL;
6290 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6291 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6292 file_name = tail;
6293 if (tail != NULL) {
6294 long *flag_addr = 0;
6295 long flag_val = 0;
6296 KMP_SSCANF(flag_addr_str, "%p", &flag_addr);
6297 KMP_SSCANF(flag_val_str, "%lx", &flag_val);
6298 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6299 // First, check whether environment-encoded address is mapped into
6300 // addr space.
6301 // If so, dereference it to see if it still has the right value.
6302 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6303 neighbor = 1;
6304 } else {
6305 // If not, then we know the other copy of the library is no longer
6306 // running.
6307 neighbor = 2;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006308 }
6309 }
6310 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006311 switch (neighbor) {
6312 case 0: // Cannot parse environment variable -- neighbor status unknown.
6313 // Assume it is the incompatible format of future version of the
6314 // library. Assume the other library is alive.
6315 // WARN( ... ); // TODO: Issue a warning.
6316 file_name = "unknown library";
6317 // Attention! Falling to the next case. That's intentional.
6318 case 1: { // Neighbor is alive.
6319 // Check it is allowed.
6320 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6321 if (!__kmp_str_match_true(duplicate_ok)) {
6322 // That's not allowed. Issue fatal error.
Jonathan Peyton6a393f72017-09-05 15:43:58 +00006323 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6324 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006325 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006326 KMP_INTERNAL_FREE(duplicate_ok);
6327 __kmp_duplicate_library_ok = 1;
6328 done = 1; // Exit the loop.
6329 } break;
6330 case 2: { // Neighbor is dead.
6331 // Clear the variable and try to register library again.
6332 __kmp_env_unset(name);
6333 } break;
6334 default: { KMP_DEBUG_ASSERT(0); } break;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006335 }
6336 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006337 KMP_INTERNAL_FREE((void *)value);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006338 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006339 KMP_INTERNAL_FREE((void *)name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006340
6341} // func __kmp_register_library_startup
6342
Jonathan Peyton30419822017-05-12 18:01:32 +00006343void __kmp_unregister_library(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006344
Jonathan Peyton30419822017-05-12 18:01:32 +00006345 char *name = __kmp_reg_status_name();
6346 char *value = __kmp_env_get(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006347
Jonathan Peyton30419822017-05-12 18:01:32 +00006348 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6349 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6350 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6351 // Ok, this is our variable. Delete it.
6352 __kmp_env_unset(name);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006353 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006354
Jonathan Peyton30419822017-05-12 18:01:32 +00006355 KMP_INTERNAL_FREE(__kmp_registration_str);
6356 KMP_INTERNAL_FREE(value);
6357 KMP_INTERNAL_FREE(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006358
Jonathan Peyton30419822017-05-12 18:01:32 +00006359 __kmp_registration_flag = 0;
6360 __kmp_registration_str = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006361
6362} // __kmp_unregister_library
6363
Jim Cownie5e8470a2013-09-27 10:38:44 +00006364// End of Library registration stuff.
Jonathan Peyton30419822017-05-12 18:01:32 +00006365// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006366
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006367#if KMP_MIC_SUPPORTED
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006368
Jonathan Peyton30419822017-05-12 18:01:32 +00006369static void __kmp_check_mic_type() {
6370 kmp_cpuid_t cpuid_state = {0};
6371 kmp_cpuid_t *cs_p = &cpuid_state;
6372 __kmp_x86_cpuid(1, 0, cs_p);
6373 // We don't support mic1 at the moment
6374 if ((cs_p->eax & 0xff0) == 0xB10) {
6375 __kmp_mic_type = mic2;
6376 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6377 __kmp_mic_type = mic3;
6378 } else {
6379 __kmp_mic_type = non_mic;
6380 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006381}
6382
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006383#endif /* KMP_MIC_SUPPORTED */
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006384
Jonathan Peyton30419822017-05-12 18:01:32 +00006385static void __kmp_do_serial_initialize(void) {
6386 int i, gtid;
6387 int size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006388
Jonathan Peyton30419822017-05-12 18:01:32 +00006389 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006390
Jonathan Peyton30419822017-05-12 18:01:32 +00006391 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4);
6392 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4);
6393 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8);
6394 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8);
6395 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006396
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006397#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006398 ompt_pre_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006399#endif
6400
Jonathan Peyton30419822017-05-12 18:01:32 +00006401 __kmp_validate_locks();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006402
Jonathan Peyton30419822017-05-12 18:01:32 +00006403 /* Initialize internal memory allocator */
6404 __kmp_init_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006405
Jonathan Peyton30419822017-05-12 18:01:32 +00006406 /* Register the library startup via an environment variable and check to see
6407 whether another copy of the library is already registered. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006408
Jonathan Peyton30419822017-05-12 18:01:32 +00006409 __kmp_register_library_startup();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006410
Jonathan Peyton30419822017-05-12 18:01:32 +00006411 /* TODO reinitialization of library */
6412 if (TCR_4(__kmp_global.g.g_done)) {
6413 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"));
6414 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006415
Jonathan Peyton30419822017-05-12 18:01:32 +00006416 __kmp_global.g.g_abort = 0;
6417 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006418
Jonathan Peyton30419822017-05-12 18:01:32 +00006419/* initialize the locks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006420#if KMP_USE_ADAPTIVE_LOCKS
6421#if KMP_DEBUG_ADAPTIVE_LOCKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006422 __kmp_init_speculative_stats();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006423#endif
6424#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006425#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006426 __kmp_stats_init();
Jonathan Peytonad579922015-12-17 16:19:05 +00006427#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006428 __kmp_init_lock(&__kmp_global_lock);
6429 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6430 __kmp_init_lock(&__kmp_debug_lock);
6431 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6432 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6433 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6434 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6435 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6436 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6437 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6438 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6439 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6440 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6441 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6442 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6443 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6444 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6445 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006446#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006447 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006448#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006449 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006450
Jonathan Peyton30419822017-05-12 18:01:32 +00006451 /* conduct initialization and initial setup of configuration */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006452
Jonathan Peyton30419822017-05-12 18:01:32 +00006453 __kmp_runtime_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006454
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006455#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006456 __kmp_check_mic_type();
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006457#endif
6458
Jonathan Peyton30419822017-05-12 18:01:32 +00006459// Some global variable initialization moved here from kmp_env_initialize()
Jim Cownie5e8470a2013-09-27 10:38:44 +00006460#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006461 kmp_diag = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006462#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006463 __kmp_abort_delay = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006464
Jonathan Peyton30419822017-05-12 18:01:32 +00006465 // From __kmp_init_dflt_team_nth()
6466 /* assume the entire machine will be used */
6467 __kmp_dflt_team_nth_ub = __kmp_xproc;
6468 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6469 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6470 }
6471 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6472 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6473 }
6474 __kmp_max_nth = __kmp_sys_max_nth;
Jonathan Peytonf4392462017-07-27 20:58:41 +00006475 __kmp_cg_max_nth = __kmp_sys_max_nth;
Jonathan Peyton4f90c822017-08-02 20:04:45 +00006476 __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default
6477 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6478 __kmp_teams_max_nth = __kmp_sys_max_nth;
6479 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006480
Jonathan Peyton30419822017-05-12 18:01:32 +00006481 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
6482 // part
6483 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006484#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006485 __kmp_monitor_wakeups =
6486 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6487 __kmp_bt_intervals =
6488 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006489#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006490 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6491 __kmp_library = library_throughput;
6492 // From KMP_SCHEDULE initialization
6493 __kmp_static = kmp_sch_static_balanced;
6494// AC: do not use analytical here, because it is non-monotonous
6495//__kmp_guided = kmp_sch_guided_iterative_chunked;
6496//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
6497// need to repeat assignment
6498// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
6499// bit control and barrier method control parts
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006500#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton30419822017-05-12 18:01:32 +00006501#define kmp_reduction_barrier_gather_bb ((int)1)
6502#define kmp_reduction_barrier_release_bb ((int)1)
6503#define kmp_reduction_barrier_gather_pat bp_hyper_bar
6504#define kmp_reduction_barrier_release_pat bp_hyper_bar
6505#endif // KMP_FAST_REDUCTION_BARRIER
6506 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6507 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6508 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6509 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6510 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6511#if KMP_FAST_REDUCTION_BARRIER
6512 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
6513 // lin_64 ): hyper,1
6514 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6515 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6516 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6517 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006518 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006519#endif // KMP_FAST_REDUCTION_BARRIER
6520 }
6521#if KMP_FAST_REDUCTION_BARRIER
6522#undef kmp_reduction_barrier_release_pat
6523#undef kmp_reduction_barrier_gather_pat
6524#undef kmp_reduction_barrier_release_bb
6525#undef kmp_reduction_barrier_gather_bb
6526#endif // KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006527#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006528 if (__kmp_mic_type == mic2) { // KNC
6529 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
6530 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather
6531 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6532 1; // forkjoin release
6533 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6534 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6535 }
6536#if KMP_FAST_REDUCTION_BARRIER
6537 if (__kmp_mic_type == mic2) { // KNC
6538 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6539 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6540 }
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006541#endif // KMP_FAST_REDUCTION_BARRIER
6542#endif // KMP_MIC_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006543
Jonathan Peyton30419822017-05-12 18:01:32 +00006544// From KMP_CHECKS initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00006545#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006546 __kmp_env_checks = TRUE; /* development versions have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006547#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006548 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006549#endif
6550
Jonathan Peyton30419822017-05-12 18:01:32 +00006551 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6552 __kmp_foreign_tp = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006553
Jonathan Peyton30419822017-05-12 18:01:32 +00006554 __kmp_global.g.g_dynamic = FALSE;
6555 __kmp_global.g.g_dynamic_mode = dynamic_default;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006556
Jonathan Peyton30419822017-05-12 18:01:32 +00006557 __kmp_env_initialize(NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006558
Jonathan Peyton30419822017-05-12 18:01:32 +00006559// Print all messages in message catalog for testing purposes.
6560#ifdef KMP_DEBUG
6561 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
6562 if (__kmp_str_match_true(val)) {
6563 kmp_str_buf_t buffer;
6564 __kmp_str_buf_init(&buffer);
6565 __kmp_i18n_dump_catalog(&buffer);
6566 __kmp_printf("%s", buffer.str);
6567 __kmp_str_buf_free(&buffer);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006568 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006569 __kmp_env_free(&val);
6570#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006571
Jonathan Peyton30419822017-05-12 18:01:32 +00006572 __kmp_threads_capacity =
6573 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6574 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6575 __kmp_tp_capacity = __kmp_default_tp_capacity(
6576 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006577
Jonathan Peyton30419822017-05-12 18:01:32 +00006578 // If the library is shut down properly, both pools must be NULL. Just in
6579 // case, set them to NULL -- some memory may leak, but subsequent code will
6580 // work even if pools are not freed.
6581 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6582 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6583 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6584 __kmp_thread_pool = NULL;
6585 __kmp_thread_pool_insert_pt = NULL;
6586 __kmp_team_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006587
Jonathan Peyton30419822017-05-12 18:01:32 +00006588 /* Allocate all of the variable sized records */
6589 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
6590 * expandable */
6591 /* Since allocation is cache-aligned, just add extra padding at the end */
6592 size =
6593 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6594 CACHE_LINE;
6595 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6596 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
6597 sizeof(kmp_info_t *) * __kmp_threads_capacity);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006598
Jonathan Peyton30419822017-05-12 18:01:32 +00006599 /* init thread counts */
6600 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6601 0); // Asserts fail if the library is reinitializing and
6602 KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination.
6603 __kmp_all_nth = 0;
6604 __kmp_nth = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006605
Jonathan Peyton30419822017-05-12 18:01:32 +00006606 /* setup the uber master thread and hierarchy */
6607 gtid = __kmp_register_root(TRUE);
6608 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid));
6609 KMP_ASSERT(KMP_UBER_GTID(gtid));
6610 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006611
Jonathan Peyton30419822017-05-12 18:01:32 +00006612 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006613
Jonathan Peyton30419822017-05-12 18:01:32 +00006614 __kmp_common_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006615
Jonathan Peyton30419822017-05-12 18:01:32 +00006616#if KMP_OS_UNIX
6617 /* invoke the child fork handler */
6618 __kmp_register_atfork();
6619#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006620
Jonathan Peyton30419822017-05-12 18:01:32 +00006621#if !defined KMP_DYNAMIC_LIB
6622 {
6623 /* Invoke the exit handler when the program finishes, only for static
6624 library. For dynamic library, we already have _fini and DllMain. */
6625 int rc = atexit(__kmp_internal_end_atexit);
6626 if (rc != 0) {
Jonathan Peyton6a393f72017-09-05 15:43:58 +00006627 __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc),
6628 __kmp_msg_null);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00006629 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006630 }
6631#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006632
Jonathan Peyton30419822017-05-12 18:01:32 +00006633#if KMP_HANDLE_SIGNALS
6634#if KMP_OS_UNIX
6635 /* NOTE: make sure that this is called before the user installs their own
6636 signal handlers so that the user handlers are called first. this way they
6637 can return false, not call our handler, avoid terminating the library, and
6638 continue execution where they left off. */
6639 __kmp_install_signals(FALSE);
6640#endif /* KMP_OS_UNIX */
6641#if KMP_OS_WINDOWS
6642 __kmp_install_signals(TRUE);
6643#endif /* KMP_OS_WINDOWS */
6644#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006645
Jonathan Peyton30419822017-05-12 18:01:32 +00006646 /* we have finished the serial initialization */
6647 __kmp_init_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006648
Jonathan Peyton30419822017-05-12 18:01:32 +00006649 __kmp_init_serial = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006650
Jonathan Peyton30419822017-05-12 18:01:32 +00006651 if (__kmp_settings) {
6652 __kmp_env_print();
6653 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006654
6655#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006656 if (__kmp_display_env || __kmp_display_env_verbose) {
6657 __kmp_env_print_2();
6658 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006659#endif // OMP_40_ENABLED
6660
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006661#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006662 ompt_post_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006663#endif
6664
Jonathan Peyton30419822017-05-12 18:01:32 +00006665 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006666
Jonathan Peyton30419822017-05-12 18:01:32 +00006667 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006668}
6669
Jonathan Peyton30419822017-05-12 18:01:32 +00006670void __kmp_serial_initialize(void) {
6671 if (__kmp_init_serial) {
6672 return;
6673 }
6674 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6675 if (__kmp_init_serial) {
6676 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6677 return;
6678 }
6679 __kmp_do_serial_initialize();
6680 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6681}
6682
6683static void __kmp_do_middle_initialize(void) {
6684 int i, j;
6685 int prev_dflt_team_nth;
6686
6687 if (!__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006688 __kmp_do_serial_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006689 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006690
Jonathan Peyton30419822017-05-12 18:01:32 +00006691 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006692
Jonathan Peyton30419822017-05-12 18:01:32 +00006693 // Save the previous value for the __kmp_dflt_team_nth so that
6694 // we can avoid some reinitialization if it hasn't changed.
6695 prev_dflt_team_nth = __kmp_dflt_team_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006696
Alp Toker98758b02014-03-02 04:12:06 +00006697#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006698 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6699 // number of cores on the machine.
6700 __kmp_affinity_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006701
Jonathan Peyton30419822017-05-12 18:01:32 +00006702 // Run through the __kmp_threads array and set the affinity mask
6703 // for each root thread that is currently registered with the RTL.
6704 for (i = 0; i < __kmp_threads_capacity; i++) {
6705 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6706 __kmp_affinity_set_init_mask(i, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006707 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006708 }
Alp Toker98758b02014-03-02 04:12:06 +00006709#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006710
Jonathan Peyton30419822017-05-12 18:01:32 +00006711 KMP_ASSERT(__kmp_xproc > 0);
6712 if (__kmp_avail_proc == 0) {
6713 __kmp_avail_proc = __kmp_xproc;
6714 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006715
Jonathan Peyton30419822017-05-12 18:01:32 +00006716 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
6717 // correct them now
6718 j = 0;
6719 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6720 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6721 __kmp_avail_proc;
6722 j++;
6723 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006724
Jonathan Peyton30419822017-05-12 18:01:32 +00006725 if (__kmp_dflt_team_nth == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006726#ifdef KMP_DFLT_NTH_CORES
Jonathan Peyton30419822017-05-12 18:01:32 +00006727 // Default #threads = #cores
6728 __kmp_dflt_team_nth = __kmp_ncores;
6729 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6730 "__kmp_ncores (%d)\n",
6731 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006732#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006733 // Default #threads = #available OS procs
6734 __kmp_dflt_team_nth = __kmp_avail_proc;
6735 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6736 "__kmp_avail_proc(%d)\n",
6737 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006738#endif /* KMP_DFLT_NTH_CORES */
Jonathan Peyton30419822017-05-12 18:01:32 +00006739 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006740
Jonathan Peyton30419822017-05-12 18:01:32 +00006741 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6742 __kmp_dflt_team_nth = KMP_MIN_NTH;
6743 }
6744 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6745 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6746 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006747
Jonathan Peyton30419822017-05-12 18:01:32 +00006748 // There's no harm in continuing if the following check fails,
6749 // but it indicates an error in the previous logic.
6750 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006751
Jonathan Peyton30419822017-05-12 18:01:32 +00006752 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6753 // Run through the __kmp_threads array and set the num threads icv for each
6754 // root thread that is currently registered with the RTL (which has not
6755 // already explicitly set its nthreads-var with a call to
6756 // omp_set_num_threads()).
6757 for (i = 0; i < __kmp_threads_capacity; i++) {
6758 kmp_info_t *thread = __kmp_threads[i];
6759 if (thread == NULL)
6760 continue;
6761 if (thread->th.th_current_task->td_icvs.nproc != 0)
6762 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006763
Jonathan Peyton30419822017-05-12 18:01:32 +00006764 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006765 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006766 }
6767 KA_TRACE(
6768 20,
6769 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6770 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006771
6772#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00006773 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
6774 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6775 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6776 if (__kmp_nth > __kmp_avail_proc) {
6777 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006778 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006779 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006780#endif /* KMP_ADJUST_BLOCKTIME */
6781
Jonathan Peyton30419822017-05-12 18:01:32 +00006782 /* we have finished middle initialization */
6783 TCW_SYNC_4(__kmp_init_middle, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006784
Jonathan Peyton30419822017-05-12 18:01:32 +00006785 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006786}
6787
Jonathan Peyton30419822017-05-12 18:01:32 +00006788void __kmp_middle_initialize(void) {
6789 if (__kmp_init_middle) {
6790 return;
6791 }
6792 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6793 if (__kmp_init_middle) {
6794 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6795 return;
6796 }
6797 __kmp_do_middle_initialize();
6798 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6799}
6800
6801void __kmp_parallel_initialize(void) {
6802 int gtid = __kmp_entry_gtid(); // this might be a new root
6803
6804 /* synchronize parallel initialization (for sibling) */
6805 if (TCR_4(__kmp_init_parallel))
6806 return;
6807 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6808 if (TCR_4(__kmp_init_parallel)) {
6809 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6810 return;
6811 }
6812
6813 /* TODO reinitialization after we have already shut down */
6814 if (TCR_4(__kmp_global.g.g_done)) {
6815 KA_TRACE(
6816 10,
6817 ("__kmp_parallel_initialize: attempt to init while shutting down\n"));
6818 __kmp_infinite_loop();
6819 }
6820
6821 /* jc: The lock __kmp_initz_lock is already held, so calling
6822 __kmp_serial_initialize would cause a deadlock. So we call
6823 __kmp_do_serial_initialize directly. */
6824 if (!__kmp_init_middle) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006825 __kmp_do_middle_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006826 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006827
Jonathan Peyton30419822017-05-12 18:01:32 +00006828 /* begin initialization */
6829 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"));
6830 KMP_ASSERT(KMP_UBER_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006831
6832#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00006833 // Save the FP control regs.
6834 // Worker threads will set theirs to these values at thread startup.
6835 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6836 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6837 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006838#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6839
6840#if KMP_OS_UNIX
Jonathan Peyton30419822017-05-12 18:01:32 +00006841#if KMP_HANDLE_SIGNALS
6842 /* must be after __kmp_serial_initialize */
6843 __kmp_install_signals(TRUE);
6844#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006845#endif
6846
Jonathan Peyton30419822017-05-12 18:01:32 +00006847 __kmp_suspend_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006848
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006849#if defined(USE_LOAD_BALANCE)
Jonathan Peyton30419822017-05-12 18:01:32 +00006850 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6851 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6852 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006853#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006854 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6855 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6856 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006857#endif
6858
Jonathan Peyton30419822017-05-12 18:01:32 +00006859 if (__kmp_version) {
6860 __kmp_print_version_2();
6861 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006862
Jonathan Peyton30419822017-05-12 18:01:32 +00006863 /* we have finished parallel initialization */
6864 TCW_SYNC_4(__kmp_init_parallel, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006865
Jonathan Peyton30419822017-05-12 18:01:32 +00006866 KMP_MB();
6867 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006868
Jonathan Peyton30419822017-05-12 18:01:32 +00006869 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006870}
6871
Jim Cownie5e8470a2013-09-27 10:38:44 +00006872/* ------------------------------------------------------------------------ */
6873
Jonathan Peyton30419822017-05-12 18:01:32 +00006874void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
6875 kmp_team_t *team) {
6876 kmp_disp_t *dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006877
Jonathan Peyton30419822017-05-12 18:01:32 +00006878 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006879
Jonathan Peyton30419822017-05-12 18:01:32 +00006880 /* none of the threads have encountered any constructs, yet. */
6881 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006882#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +00006883 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006884#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +00006885 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6886 KMP_DEBUG_ASSERT(dispatch);
6887 KMP_DEBUG_ASSERT(team->t.t_dispatch);
6888 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
6889 // this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006890
Jonathan Peyton30419822017-05-12 18:01:32 +00006891 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006892#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006893 dispatch->th_doacross_buf_idx =
6894 0; /* reset the doacross dispatch buffer counter */
Jonathan Peyton71909c52016-03-02 22:42:06 +00006895#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006896 if (__kmp_env_consistency_check)
6897 __kmp_push_parallel(gtid, team->t.t_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006898
Jonathan Peyton30419822017-05-12 18:01:32 +00006899 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006900}
6901
Jonathan Peyton30419822017-05-12 18:01:32 +00006902void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
6903 kmp_team_t *team) {
6904 if (__kmp_env_consistency_check)
6905 __kmp_pop_parallel(gtid, team->t.t_ident);
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +00006906
Jonathan Peyton30419822017-05-12 18:01:32 +00006907 __kmp_finish_implicit_task(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006908}
6909
Jonathan Peyton30419822017-05-12 18:01:32 +00006910int __kmp_invoke_task_func(int gtid) {
6911 int rc;
6912 int tid = __kmp_tid_from_gtid(gtid);
6913 kmp_info_t *this_thr = __kmp_threads[gtid];
6914 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006915
Jonathan Peyton30419822017-05-12 18:01:32 +00006916 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006917#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00006918 if (__itt_stack_caller_create_ptr) {
6919 __kmp_itt_stack_callee_enter(
6920 (__itt_caller)
6921 team->t.t_stack_id); // inform ittnotify about entering user's code
6922 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006923#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006924#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006925 SSC_MARK_INVOKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006926#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006927
6928#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006929 void *dummy;
6930 void **exit_runtime_p;
Joachim Protze82e94a52017-11-01 10:08:30 +00006931 ompt_data_t *my_task_data;
6932 ompt_data_t *my_parallel_data;
6933 int ompt_team_size;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006934
Joachim Protze82e94a52017-11-01 10:08:30 +00006935 if (ompt_enabled.enabled) {
Joachim Protzec255ca72017-11-05 14:11:10 +00006936 exit_runtime_p = &(
6937 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame);
Jonathan Peyton30419822017-05-12 18:01:32 +00006938 } else {
6939 exit_runtime_p = &dummy;
6940 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006941
Joachim Protze82e94a52017-11-01 10:08:30 +00006942 my_task_data =
6943 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
6944 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
6945 if (ompt_enabled.ompt_callback_implicit_task) {
6946 ompt_team_size = team->t.t_nproc;
6947 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
6948 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
6949 __kmp_tid_from_gtid(gtid));
Joachim Protze9be9cf22018-05-07 12:42:21 +00006950 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
Jonathan Peyton30419822017-05-12 18:01:32 +00006951 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006952#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006953
Jonathan Peyton30419822017-05-12 18:01:32 +00006954 {
6955 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6956 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6957 rc =
6958 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
6959 tid, (int)team->t.t_argc, (void **)team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006960#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006961 ,
6962 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006963#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006964 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006965#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006966 *exit_runtime_p = NULL;
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006967#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006968 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006969
Jim Cownie5e8470a2013-09-27 10:38:44 +00006970#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00006971 if (__itt_stack_caller_create_ptr) {
6972 __kmp_itt_stack_callee_leave(
6973 (__itt_caller)
6974 team->t.t_stack_id); // inform ittnotify about leaving user's code
6975 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006976#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00006977 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006978
Jonathan Peyton30419822017-05-12 18:01:32 +00006979 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006980}
6981
6982#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006983void __kmp_teams_master(int gtid) {
6984 // This routine is called by all master threads in teams construct
6985 kmp_info_t *thr = __kmp_threads[gtid];
6986 kmp_team_t *team = thr->th.th_team;
6987 ident_t *loc = team->t.t_ident;
6988 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6989 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
6990 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
6991 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
6992 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
6993// Launch league of teams now, but not let workers execute
6994// (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006995#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006996 SSC_MARK_FORKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006997#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006998 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
Jonathan Peyton30419822017-05-12 18:01:32 +00006999 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
7000 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007001#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00007002 SSC_MARK_JOINING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007003#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00007004
Jonathan Peyton30419822017-05-12 18:01:32 +00007005 // AC: last parameter "1" eliminates join barrier which won't work because
7006 // worker threads are in a fork barrier waiting for more parallel regions
7007 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00007008#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00007009 ,
7010 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00007011#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007012 ,
7013 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007014}
7015
Jonathan Peyton30419822017-05-12 18:01:32 +00007016int __kmp_invoke_teams_master(int gtid) {
7017 kmp_info_t *this_thr = __kmp_threads[gtid];
7018 kmp_team_t *team = this_thr->th.th_team;
7019#if KMP_DEBUG
7020 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7021 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7022 (void *)__kmp_teams_master);
7023#endif
7024 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7025 __kmp_teams_master(gtid);
7026 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7027 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007028}
7029#endif /* OMP_40_ENABLED */
7030
7031/* this sets the requested number of threads for the next parallel region
Jonathan Peyton30419822017-05-12 18:01:32 +00007032 encountered by this team. since this should be enclosed in the forkjoin
7033 critical section it should avoid race conditions with assymmetrical nested
7034 parallelism */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007035
Jonathan Peyton30419822017-05-12 18:01:32 +00007036void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
7037 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007038
Jonathan Peyton30419822017-05-12 18:01:32 +00007039 if (num_threads > 0)
7040 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007041}
7042
7043#if OMP_40_ENABLED
7044
7045/* this sets the requested number of teams for the teams region and/or
Jonathan Peyton30419822017-05-12 18:01:32 +00007046 the number of threads for the next parallel region encountered */
7047void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
7048 int num_threads) {
7049 kmp_info_t *thr = __kmp_threads[gtid];
7050 KMP_DEBUG_ASSERT(num_teams >= 0);
7051 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00007052
Jonathan Peyton30419822017-05-12 18:01:32 +00007053 if (num_teams == 0)
7054 num_teams = 1; // default number of teams is 1.
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007055 if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
Jonathan Peyton30419822017-05-12 18:01:32 +00007056 if (!__kmp_reserve_warn) {
7057 __kmp_reserve_warn = 1;
7058 __kmp_msg(kmp_ms_warning,
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007059 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
Jonathan Peyton30419822017-05-12 18:01:32 +00007060 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007061 }
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007062 num_teams = __kmp_teams_max_nth;
Jonathan Peyton30419822017-05-12 18:01:32 +00007063 }
7064 // Set number of teams (number of threads in the outer "parallel" of the
7065 // teams)
7066 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007067
Jonathan Peyton30419822017-05-12 18:01:32 +00007068 // Remember the number of threads for inner parallel regions
7069 if (num_threads == 0) {
7070 if (!TCR_4(__kmp_init_middle))
7071 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
7072 num_threads = __kmp_avail_proc / num_teams;
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007073 if (num_teams * num_threads > __kmp_teams_max_nth) {
Jonathan Peyton30419822017-05-12 18:01:32 +00007074 // adjust num_threads w/o warning as it is not user setting
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007075 num_threads = __kmp_teams_max_nth / num_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007076 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007077 } else {
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007078 if (num_teams * num_threads > __kmp_teams_max_nth) {
7079 int new_threads = __kmp_teams_max_nth / num_teams;
Jonathan Peyton30419822017-05-12 18:01:32 +00007080 if (!__kmp_reserve_warn) { // user asked for too many threads
Jonathan Peyton4f90c822017-08-02 20:04:45 +00007081 __kmp_reserve_warn = 1; // that conflicts with KMP_TEAMS_THREAD_LIMIT
Jonathan Peyton30419822017-05-12 18:01:32 +00007082 __kmp_msg(kmp_ms_warning,
7083 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7084 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7085 }
7086 num_threads = new_threads;
7087 }
7088 }
7089 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007090}
7091
Jim Cownie5e8470a2013-09-27 10:38:44 +00007092// Set the proc_bind var to use in the following parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007093void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
7094 kmp_info_t *thr = __kmp_threads[gtid];
7095 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007096}
7097
7098#endif /* OMP_40_ENABLED */
7099
7100/* Launch the worker threads into the microtask. */
7101
Jonathan Peyton30419822017-05-12 18:01:32 +00007102void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
7103 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007104
7105#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007106 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007107#endif /* KMP_DEBUG */
7108
Jonathan Peyton30419822017-05-12 18:01:32 +00007109 KMP_DEBUG_ASSERT(team);
7110 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7111 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7112 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007113
Jonathan Peyton30419822017-05-12 18:01:32 +00007114 team->t.t_construct = 0; /* no single directives seen yet */
7115 team->t.t_ordered.dt.t_value =
7116 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007117
Jonathan Peyton30419822017-05-12 18:01:32 +00007118 /* Reset the identifiers on the dispatch buffer */
7119 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7120 if (team->t.t_max_nproc > 1) {
7121 int i;
7122 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7123 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007124#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007125 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00007126#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007127 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007128 } else {
7129 team->t.t_disp_buffer[0].buffer_index = 0;
7130#if OMP_45_ENABLED
7131 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7132#endif
7133 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007134
Jonathan Peyton30419822017-05-12 18:01:32 +00007135 KMP_MB(); /* Flush all pending memory write invalidates. */
7136 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007137
7138#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007139 for (f = 0; f < team->t.t_nproc; f++) {
7140 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7141 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7142 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007143#endif /* KMP_DEBUG */
7144
Jonathan Peyton30419822017-05-12 18:01:32 +00007145 /* release the worker threads so they may begin working */
7146 __kmp_fork_barrier(gtid, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007147}
7148
Jonathan Peyton30419822017-05-12 18:01:32 +00007149void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
7150 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007151
Jonathan Peyton30419822017-05-12 18:01:32 +00007152 KMP_DEBUG_ASSERT(team);
7153 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7154 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7155 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007156
Jonathan Peyton30419822017-05-12 18:01:32 +00007157/* Join barrier after fork */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007158
7159#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007160 if (__kmp_threads[gtid] &&
7161 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7162 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7163 __kmp_threads[gtid]);
7164 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7165 "team->t.t_nproc=%d\n",
7166 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7167 team->t.t_nproc);
7168 __kmp_print_structure();
7169 }
7170 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7171 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007172#endif /* KMP_DEBUG */
7173
Jonathan Peyton30419822017-05-12 18:01:32 +00007174 __kmp_join_barrier(gtid); /* wait for everyone */
Joachim Protze82e94a52017-11-01 10:08:30 +00007175#if OMPT_SUPPORT
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007176 if (ompt_enabled.enabled &&
7177 this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
7178 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7179 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
Joachim Protze82e94a52017-11-01 10:08:30 +00007180 this_thr->th.ompt_thread_info.state = omp_state_overhead;
7181#if OMPT_OPTIONAL
7182 void *codeptr = NULL;
7183 if (KMP_MASTER_TID(ds_tid) &&
7184 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7185 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7186 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7187
7188 if (ompt_enabled.ompt_callback_sync_region_wait) {
7189 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007190 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00007191 }
7192 if (ompt_enabled.ompt_callback_sync_region) {
7193 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007194 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
Joachim Protze82e94a52017-11-01 10:08:30 +00007195 }
7196#endif
7197 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7198 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
Jonas Hahnfeld82768d02018-02-23 16:46:25 +00007199 ompt_scope_end, NULL, task_data, 0, ds_tid);
Joachim Protze82e94a52017-11-01 10:08:30 +00007200 }
Joachim Protze82e94a52017-11-01 10:08:30 +00007201 }
7202#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007203
Jonathan Peyton30419822017-05-12 18:01:32 +00007204 KMP_MB(); /* Flush all pending memory write invalidates. */
7205 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007206}
7207
Jim Cownie5e8470a2013-09-27 10:38:44 +00007208/* ------------------------------------------------------------------------ */
7209
7210#ifdef USE_LOAD_BALANCE
7211
Jim Cownie5e8470a2013-09-27 10:38:44 +00007212// Return the worker threads actively spinning in the hot team, if we
7213// are at the outermost level of parallelism. Otherwise, return 0.
Jonathan Peyton30419822017-05-12 18:01:32 +00007214static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7215 int i;
7216 int retval;
7217 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007218
Jonathan Peyton30419822017-05-12 18:01:32 +00007219 if (root->r.r_active) {
7220 return 0;
7221 }
7222 hot_team = root->r.r_hot_team;
7223 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7224 return hot_team->t.t_nproc - 1; // Don't count master thread
7225 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007226
Jonathan Peyton30419822017-05-12 18:01:32 +00007227 // Skip the master thread - it is accounted for elsewhere.
7228 retval = 0;
7229 for (i = 1; i < hot_team->t.t_nproc; i++) {
7230 if (hot_team->t.t_threads[i]->th.th_active) {
7231 retval++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007232 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007233 }
7234 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007235}
7236
Jim Cownie5e8470a2013-09-27 10:38:44 +00007237// Perform an automatic adjustment to the number of
7238// threads used by the next parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007239static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
7240 int retval;
7241 int pool_active;
7242 int hot_team_active;
7243 int team_curr_active;
7244 int system_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007245
Jonathan Peyton30419822017-05-12 18:01:32 +00007246 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7247 set_nproc));
7248 KMP_DEBUG_ASSERT(root);
7249 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7250 ->th.th_current_task->td_icvs.dynamic == TRUE);
7251 KMP_DEBUG_ASSERT(set_nproc > 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007252
Jonathan Peyton30419822017-05-12 18:01:32 +00007253 if (set_nproc == 1) {
7254 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"));
7255 return 1;
7256 }
7257
7258 // Threads that are active in the thread pool, active in the hot team for this
7259 // particular root (if we are at the outer par level), and the currently
7260 // executing thread (to become the master) are available to add to the new
7261 // team, but are currently contributing to the system load, and must be
7262 // accounted for.
Jonathan Peyton37e2ef52018-07-09 17:36:22 +00007263 pool_active = __kmp_thread_pool_active_nth;
Jonathan Peyton30419822017-05-12 18:01:32 +00007264 hot_team_active = __kmp_active_hot_team_nproc(root);
7265 team_curr_active = pool_active + hot_team_active + 1;
7266
7267 // Check the system load.
7268 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7269 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "
7270 "hot team active = %d\n",
7271 system_active, pool_active, hot_team_active));
7272
7273 if (system_active < 0) {
7274 // There was an error reading the necessary info from /proc, so use the
7275 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
7276 // = dynamic_thread_limit, we shouldn't wind up getting back here.
7277 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7278 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit");
7279
7280 // Make this call behave like the thread limit algorithm.
7281 retval = __kmp_avail_proc - __kmp_nth +
7282 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7283 if (retval > set_nproc) {
7284 retval = set_nproc;
7285 }
7286 if (retval < KMP_MIN_NTH) {
7287 retval = KMP_MIN_NTH;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007288 }
7289
Jonathan Peyton30419822017-05-12 18:01:32 +00007290 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7291 retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007292 return retval;
Jonathan Peyton30419822017-05-12 18:01:32 +00007293 }
7294
7295 // There is a slight delay in the load balance algorithm in detecting new
7296 // running procs. The real system load at this instant should be at least as
7297 // large as the #active omp thread that are available to add to the team.
7298 if (system_active < team_curr_active) {
7299 system_active = team_curr_active;
7300 }
7301 retval = __kmp_avail_proc - system_active + team_curr_active;
7302 if (retval > set_nproc) {
7303 retval = set_nproc;
7304 }
7305 if (retval < KMP_MIN_NTH) {
7306 retval = KMP_MIN_NTH;
7307 }
7308
7309 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7310 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007311} // __kmp_load_balance_nproc()
7312
7313#endif /* USE_LOAD_BALANCE */
7314
Jim Cownie5e8470a2013-09-27 10:38:44 +00007315/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007316
7317/* NOTE: this is called with the __kmp_init_lock held */
Jonathan Peyton30419822017-05-12 18:01:32 +00007318void __kmp_cleanup(void) {
7319 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007320
Jonathan Peyton30419822017-05-12 18:01:32 +00007321 KA_TRACE(10, ("__kmp_cleanup: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007322
Jonathan Peyton30419822017-05-12 18:01:32 +00007323 if (TCR_4(__kmp_init_parallel)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007324#if KMP_HANDLE_SIGNALS
Jonathan Peyton30419822017-05-12 18:01:32 +00007325 __kmp_remove_signals();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007326#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007327 TCW_4(__kmp_init_parallel, FALSE);
7328 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007329
Jonathan Peyton30419822017-05-12 18:01:32 +00007330 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007331#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007332 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007333#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton30419822017-05-12 18:01:32 +00007334 __kmp_cleanup_hierarchy();
7335 TCW_4(__kmp_init_middle, FALSE);
7336 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007337
Jonathan Peyton30419822017-05-12 18:01:32 +00007338 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007339
Jonathan Peyton30419822017-05-12 18:01:32 +00007340 if (__kmp_init_serial) {
7341 __kmp_runtime_destroy();
7342 __kmp_init_serial = FALSE;
7343 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007344
Andrey Churbanov9e9333a2018-03-05 18:42:01 +00007345 __kmp_cleanup_threadprivate_caches();
7346
Jonathan Peyton30419822017-05-12 18:01:32 +00007347 for (f = 0; f < __kmp_threads_capacity; f++) {
7348 if (__kmp_root[f] != NULL) {
7349 __kmp_free(__kmp_root[f]);
7350 __kmp_root[f] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007351 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007352 }
7353 __kmp_free(__kmp_threads);
7354 // __kmp_threads and __kmp_root were allocated at once, as single block, so
7355 // there is no need in freeing __kmp_root.
7356 __kmp_threads = NULL;
7357 __kmp_root = NULL;
7358 __kmp_threads_capacity = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007359
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007360#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00007361 __kmp_cleanup_indirect_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007362#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007363 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007364#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007365
Jonathan Peyton30419822017-05-12 18:01:32 +00007366#if KMP_AFFINITY_SUPPORTED
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00007367 KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file));
Jonathan Peyton30419822017-05-12 18:01:32 +00007368 __kmp_cpuinfo_file = NULL;
7369#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007370
Jonathan Peyton30419822017-05-12 18:01:32 +00007371#if KMP_USE_ADAPTIVE_LOCKS
7372#if KMP_DEBUG_ADAPTIVE_LOCKS
7373 __kmp_print_speculative_stats();
7374#endif
7375#endif
7376 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7377 __kmp_nested_nth.nth = NULL;
7378 __kmp_nested_nth.size = 0;
7379 __kmp_nested_nth.used = 0;
7380 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7381 __kmp_nested_proc_bind.bind_types = NULL;
7382 __kmp_nested_proc_bind.size = 0;
7383 __kmp_nested_proc_bind.used = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007384
Jonathan Peyton30419822017-05-12 18:01:32 +00007385 __kmp_i18n_catclose();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007386
Jonathan Peytonf6399362018-07-09 17:51:13 +00007387#if KMP_USE_HIER_SCHED
7388 __kmp_hier_scheds.deallocate();
7389#endif
7390
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007391#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007392 __kmp_stats_fini();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007393#endif
7394
Jonathan Peyton30419822017-05-12 18:01:32 +00007395 KA_TRACE(10, ("__kmp_cleanup: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007396}
7397
7398/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007399
7400int __kmp_ignore_mppbeg(void) {
7401 char *env;
7402
7403 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) {
7404 if (__kmp_str_match_false(env))
7405 return FALSE;
7406 }
7407 // By default __kmpc_begin() is no-op.
7408 return TRUE;
7409}
7410
7411int __kmp_ignore_mppend(void) {
7412 char *env;
7413
7414 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) {
7415 if (__kmp_str_match_false(env))
7416 return FALSE;
7417 }
7418 // By default __kmpc_end() is no-op.
7419 return TRUE;
7420}
7421
7422void __kmp_internal_begin(void) {
7423 int gtid;
7424 kmp_root_t *root;
7425
7426 /* this is a very important step as it will register new sibling threads
7427 and assign these new uber threads a new gtid */
7428 gtid = __kmp_entry_gtid();
7429 root = __kmp_threads[gtid]->th.th_root;
7430 KMP_ASSERT(KMP_UBER_GTID(gtid));
7431
7432 if (root->r.r_begin)
7433 return;
7434 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7435 if (root->r.r_begin) {
7436 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7437 return;
7438 }
7439
7440 root->r.r_begin = TRUE;
7441
7442 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7443}
7444
Jim Cownie5e8470a2013-09-27 10:38:44 +00007445/* ------------------------------------------------------------------------ */
7446
Jonathan Peyton30419822017-05-12 18:01:32 +00007447void __kmp_user_set_library(enum library_type arg) {
7448 int gtid;
7449 kmp_root_t *root;
7450 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007451
Jonathan Peyton30419822017-05-12 18:01:32 +00007452 /* first, make sure we are initialized so we can get our gtid */
7453
7454 gtid = __kmp_entry_gtid();
7455 thread = __kmp_threads[gtid];
7456
7457 root = thread->th.th_root;
7458
7459 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7460 library_serial));
7461 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
7462 thread */
7463 KMP_WARNING(SetLibraryIncorrectCall);
7464 return;
7465 }
7466
7467 switch (arg) {
7468 case library_serial:
7469 thread->th.th_set_nproc = 0;
7470 set__nproc(thread, 1);
7471 break;
7472 case library_turnaround:
7473 thread->th.th_set_nproc = 0;
7474 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7475 : __kmp_dflt_team_nth_ub);
7476 break;
7477 case library_throughput:
7478 thread->th.th_set_nproc = 0;
7479 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7480 : __kmp_dflt_team_nth_ub);
7481 break;
7482 default:
7483 KMP_FATAL(UnknownLibraryType, arg);
7484 }
7485
7486 __kmp_aux_set_library(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007487}
7488
Jonathan Peyton30419822017-05-12 18:01:32 +00007489void __kmp_aux_set_stacksize(size_t arg) {
7490 if (!__kmp_init_serial)
7491 __kmp_serial_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007492
7493#if KMP_OS_DARWIN
Jonathan Peyton30419822017-05-12 18:01:32 +00007494 if (arg & (0x1000 - 1)) {
7495 arg &= ~(0x1000 - 1);
7496 if (arg + 0x1000) /* check for overflow if we round up */
7497 arg += 0x1000;
7498 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007499#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007500 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007501
Jonathan Peyton30419822017-05-12 18:01:32 +00007502 /* only change the default stacksize before the first parallel region */
7503 if (!TCR_4(__kmp_init_parallel)) {
7504 size_t value = arg; /* argument is in bytes */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007505
Jonathan Peyton30419822017-05-12 18:01:32 +00007506 if (value < __kmp_sys_min_stksize)
7507 value = __kmp_sys_min_stksize;
7508 else if (value > KMP_MAX_STKSIZE)
7509 value = KMP_MAX_STKSIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007510
Jonathan Peyton30419822017-05-12 18:01:32 +00007511 __kmp_stksize = value;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007512
Jonathan Peyton30419822017-05-12 18:01:32 +00007513 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7514 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007515
Jonathan Peyton30419822017-05-12 18:01:32 +00007516 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007517}
7518
7519/* set the behaviour of the runtime library */
7520/* TODO this can cause some odd behaviour with sibling parallelism... */
Jonathan Peyton30419822017-05-12 18:01:32 +00007521void __kmp_aux_set_library(enum library_type arg) {
7522 __kmp_library = arg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007523
Jonathan Peyton30419822017-05-12 18:01:32 +00007524 switch (__kmp_library) {
7525 case library_serial: {
7526 KMP_INFORM(LibraryIsSerial);
7527 (void)__kmp_change_library(TRUE);
7528 } break;
7529 case library_turnaround:
7530 (void)__kmp_change_library(TRUE);
7531 break;
7532 case library_throughput:
7533 (void)__kmp_change_library(FALSE);
7534 break;
7535 default:
7536 KMP_FATAL(UnknownLibraryType, arg);
7537 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007538}
7539
7540/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007541
Jonathan Peyton30419822017-05-12 18:01:32 +00007542void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
7543 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007544#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007545 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007546#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007547 int bt_set;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007548
Jonathan Peyton30419822017-05-12 18:01:32 +00007549 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007550
Jonathan Peyton30419822017-05-12 18:01:32 +00007551 /* Normalize and set blocktime for the teams */
7552 if (blocktime < KMP_MIN_BLOCKTIME)
7553 blocktime = KMP_MIN_BLOCKTIME;
7554 else if (blocktime > KMP_MAX_BLOCKTIME)
7555 blocktime = KMP_MAX_BLOCKTIME;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007556
Jonathan Peyton30419822017-05-12 18:01:32 +00007557 set__blocktime_team(thread->th.th_team, tid, blocktime);
7558 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007559
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007560#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007561 /* Calculate and set blocktime intervals for the teams */
7562 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007563
Jonathan Peyton30419822017-05-12 18:01:32 +00007564 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
7565 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007566#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007567
Jonathan Peyton30419822017-05-12 18:01:32 +00007568 /* Set whether blocktime has been set to "TRUE" */
7569 bt_set = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007570
Jonathan Peyton30419822017-05-12 18:01:32 +00007571 set__bt_set_team(thread->th.th_team, tid, bt_set);
7572 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007573#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007574 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
7575 "bt_intervals=%d, monitor_updates=%d\n",
7576 __kmp_gtid_from_tid(tid, thread->th.th_team),
7577 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7578 __kmp_monitor_wakeups));
Samuel Antao33515192016-10-20 13:20:17 +00007579#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007580 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7581 __kmp_gtid_from_tid(tid, thread->th.th_team),
7582 thread->th.th_team->t.t_id, tid, blocktime));
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007583#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007584}
7585
Jonathan Peyton30419822017-05-12 18:01:32 +00007586void __kmp_aux_set_defaults(char const *str, int len) {
7587 if (!__kmp_init_serial) {
7588 __kmp_serial_initialize();
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00007589 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007590 __kmp_env_initialize(str);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007591
Jonathan Peyton30419822017-05-12 18:01:32 +00007592 if (__kmp_settings
Jim Cownie5e8470a2013-09-27 10:38:44 +00007593#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007594 || __kmp_display_env || __kmp_display_env_verbose
Jim Cownie5e8470a2013-09-27 10:38:44 +00007595#endif // OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007596 ) {
7597 __kmp_env_print();
7598 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007599} // __kmp_aux_set_defaults
7600
7601/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007602/* internal fast reduction routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007603
Jim Cownie5e8470a2013-09-27 10:38:44 +00007604PACKED_REDUCTION_METHOD_T
Jonathan Peyton30419822017-05-12 18:01:32 +00007605__kmp_determine_reduction_method(
7606 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
7607 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7608 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007609
Jonathan Peyton30419822017-05-12 18:01:32 +00007610 // Default reduction method: critical construct ( lck != NULL, like in current
7611 // PAROPT )
7612 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
7613 // can be selected by RTL
7614 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
7615 // can be selected by RTL
7616 // Finally, it's up to OpenMP RTL to make a decision on which method to select
7617 // among generated by PAROPT.
Jim Cownie5e8470a2013-09-27 10:38:44 +00007618
Jonathan Peyton30419822017-05-12 18:01:32 +00007619 PACKED_REDUCTION_METHOD_T retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007620
Jonathan Peyton30419822017-05-12 18:01:32 +00007621 int team_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007622
Jonathan Peyton30419822017-05-12 18:01:32 +00007623 KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 )
7624 KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )
Jim Cownie5e8470a2013-09-27 10:38:44 +00007625
Jonathan Peyton30419822017-05-12 18:01:32 +00007626#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
7627 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
7628#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
Jim Cownie5e8470a2013-09-27 10:38:44 +00007629
Jonathan Peyton30419822017-05-12 18:01:32 +00007630 retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007631
Jonathan Peyton30419822017-05-12 18:01:32 +00007632 // another choice of getting a team size (with 1 dynamic deference) is slower
7633 team_size = __kmp_get_team_num_threads(global_tid);
7634 if (team_size == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007635
Jonathan Peyton30419822017-05-12 18:01:32 +00007636 retval = empty_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007637
Jonathan Peyton30419822017-05-12 18:01:32 +00007638 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007639
Jonathan Peyton30419822017-05-12 18:01:32 +00007640 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7641 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007642
Jonathan Peyton30419822017-05-12 18:01:32 +00007643#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007644
Jonathan Peyton30419822017-05-12 18:01:32 +00007645#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \
7646 KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007647
Jonathan Peyton30419822017-05-12 18:01:32 +00007648 int teamsize_cutoff = 4;
Jonathan Peyton91b78702015-06-08 19:39:07 +00007649
Jonathan Peyton492e0a32017-06-13 17:17:26 +00007650#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007651 if (__kmp_mic_type != non_mic) {
7652 teamsize_cutoff = 8;
7653 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007654#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007655 if (tree_available) {
7656 if (team_size <= teamsize_cutoff) {
7657 if (atomic_available) {
7658 retval = atomic_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007659 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007660 } else {
7661 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7662 }
7663 } else if (atomic_available) {
7664 retval = atomic_reduce_block;
7665 }
7666#else
7667#error "Unknown or unsupported OS"
7668#endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS ||
7669// KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007670
Jonathan Peyton30419822017-05-12 18:01:32 +00007671#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
7672
7673#if KMP_OS_LINUX || KMP_OS_WINDOWS
7674
7675 // basic tuning
7676
7677 if (atomic_available) {
7678 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
7679 retval = atomic_reduce_block;
7680 }
7681 } // otherwise: use critical section
7682
7683#elif KMP_OS_DARWIN
7684
7685 if (atomic_available && (num_vars <= 3)) {
7686 retval = atomic_reduce_block;
7687 } else if (tree_available) {
7688 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
7689 (reduce_size < (2000 * sizeof(kmp_real64)))) {
7690 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7691 }
7692 } // otherwise: use critical section
7693
7694#else
7695#error "Unknown or unsupported OS"
7696#endif
7697
7698#else
7699#error "Unknown or unsupported architecture"
7700#endif
7701 }
7702
7703 // KMP_FORCE_REDUCTION
7704
7705 // If the team is serialized (team_size == 1), ignore the forced reduction
7706 // method and stay with the unsynchronized method (empty_reduce_block)
7707 if (__kmp_force_reduction_method != reduction_method_not_defined &&
7708 team_size != 1) {
7709
7710 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
7711
7712 int atomic_available, tree_available;
7713
7714 switch ((forced_retval = __kmp_force_reduction_method)) {
7715 case critical_reduce_block:
7716 KMP_ASSERT(lck); // lck should be != 0
7717 break;
7718
7719 case atomic_reduce_block:
7720 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7721 if (!atomic_available) {
7722 KMP_WARNING(RedMethodNotSupported, "atomic");
7723 forced_retval = critical_reduce_block;
7724 }
7725 break;
7726
7727 case tree_reduce_block:
7728 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7729 if (!tree_available) {
7730 KMP_WARNING(RedMethodNotSupported, "tree");
7731 forced_retval = critical_reduce_block;
7732 } else {
7733#if KMP_FAST_REDUCTION_BARRIER
7734 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7735#endif
7736 }
7737 break;
7738
7739 default:
7740 KMP_ASSERT(0); // "unsupported method specified"
Jim Cownie5e8470a2013-09-27 10:38:44 +00007741 }
7742
Jonathan Peyton30419822017-05-12 18:01:32 +00007743 retval = forced_retval;
7744 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007745
Jonathan Peyton30419822017-05-12 18:01:32 +00007746 KA_TRACE(10, ("reduction method selected=%08x\n", retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007747
Jonathan Peyton30419822017-05-12 18:01:32 +00007748#undef FAST_REDUCTION_TREE_METHOD_GENERATED
7749#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7750
7751 return (retval);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007752}
7753
7754// this function is for testing set/get/determine reduce method
Jonathan Peyton30419822017-05-12 18:01:32 +00007755kmp_int32 __kmp_get_reduce_method(void) {
7756 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007757}