blob: c9dba4e45b12e554ef200968a988835218905238 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_runtime.cpp -- KPTS runtime support library
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000017#include "kmp_affinity.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000018#include "kmp_atomic.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000019#include "kmp_environment.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000020#include "kmp_error.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000021#include "kmp_i18n.h"
22#include "kmp_io.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000023#include "kmp_itt.h"
24#include "kmp_settings.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000025#include "kmp_stats.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000026#include "kmp_str.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000027#include "kmp_wait_release.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000028#include "kmp_wrapper_getpid.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000029
Andrey Churbanovd7d088f2015-04-29 16:42:24 +000030#if OMPT_SUPPORT
31#include "ompt-specific.h"
32#endif
33
Jim Cownie5e8470a2013-09-27 10:38:44 +000034/* these are temporary issues to be dealt with */
35#define KMP_USE_PRCTL 0
Jim Cownie5e8470a2013-09-27 10:38:44 +000036
Jim Cownie5e8470a2013-09-27 10:38:44 +000037#if KMP_OS_WINDOWS
38#include <process.h>
39#endif
40
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000041#include "tsan_annotations.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000042
43#if defined(KMP_GOMP_COMPAT)
Jonathan Peyton30419822017-05-12 18:01:32 +000044char const __kmp_version_alt_comp[] =
45 KMP_VERSION_PREFIX "alternative compiler support: yes";
Jim Cownie5e8470a2013-09-27 10:38:44 +000046#endif /* defined(KMP_GOMP_COMPAT) */
47
48char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
Jonathan Peytone844a542017-03-06 22:07:40 +000049#if OMP_50_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000050 "5.0 (201611)";
Jonathan Peytone844a542017-03-06 22:07:40 +000051#elif OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000052 "4.5 (201511)";
Jonathan Peyton74f3ffc2016-09-30 15:50:14 +000053#elif OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +000054 "4.0 (201307)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000055#else
Jonathan Peyton30419822017-05-12 18:01:32 +000056 "3.1 (201107)";
Jim Cownie5e8470a2013-09-27 10:38:44 +000057#endif
58
59#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +000060char const __kmp_version_lock[] =
61 KMP_VERSION_PREFIX "lock type: run time selectable";
Jim Cownie5e8470a2013-09-27 10:38:44 +000062#endif /* KMP_DEBUG */
63
Jonathan Peyton30419822017-05-12 18:01:32 +000064#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
Jim Cownie181b4bb2013-12-23 17:28:57 +000065
Jim Cownie5e8470a2013-09-27 10:38:44 +000066/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +000067
68kmp_info_t __kmp_monitor;
69
Jim Cownie5e8470a2013-09-27 10:38:44 +000070/* Forward declarations */
71
Jonathan Peyton30419822017-05-12 18:01:32 +000072void __kmp_cleanup(void);
Jim Cownie5e8470a2013-09-27 10:38:44 +000073
Jonathan Peyton30419822017-05-12 18:01:32 +000074static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
75 int gtid);
76static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
77 kmp_internal_control_t *new_icvs,
78 ident_t *loc);
Jonathan Peyton2321d572015-06-08 19:25:25 +000079#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +000080static void __kmp_partition_places(kmp_team_t *team,
81 int update_master_only = 0);
Jonathan Peyton2321d572015-06-08 19:25:25 +000082#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000083static void __kmp_do_serial_initialize(void);
84void __kmp_fork_barrier(int gtid, int tid);
85void __kmp_join_barrier(int gtid);
86void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
87 kmp_internal_control_t *new_icvs, ident_t *loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000088
Jim Cownie5e8470a2013-09-27 10:38:44 +000089#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +000090static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +000091#endif
92
93static int __kmp_expand_threads(int nWish, int nNeed);
Jonathan Peyton2321d572015-06-08 19:25:25 +000094#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +000095static int __kmp_unregister_root_other_thread(int gtid);
Jonathan Peyton2321d572015-06-08 19:25:25 +000096#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000097static void __kmp_unregister_library(void); // called by __kmp_internal_end()
98static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +000099static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
100
Jim Cownie5e8470a2013-09-27 10:38:44 +0000101/* Calculate the identifier of the current thread */
Jonathan Peyton30419822017-05-12 18:01:32 +0000102/* fast (and somewhat portable) way to get unique identifier of executing
103 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
Jonathan Peyton30419822017-05-12 18:01:32 +0000104int __kmp_get_global_thread_id() {
105 int i;
106 kmp_info_t **other_threads;
107 size_t stack_data;
108 char *stack_addr;
109 size_t stack_size;
110 char *stack_base;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000111
Jonathan Peyton30419822017-05-12 18:01:32 +0000112 KA_TRACE(
113 1000,
114 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
115 __kmp_nth, __kmp_all_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000116
Jonathan Peyton30419822017-05-12 18:01:32 +0000117 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to
118 a parallel region, made it return KMP_GTID_DNE to force serial_initialize
119 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
120 __kmp_init_gtid for this to work. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000121
Jonathan Peyton30419822017-05-12 18:01:32 +0000122 if (!TCR_4(__kmp_init_gtid))
123 return KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000124
125#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000126 if (TCR_4(__kmp_gtid_mode) >= 3) {
127 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"));
128 return __kmp_gtid;
129 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000130#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000131 if (TCR_4(__kmp_gtid_mode) >= 2) {
132 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"));
133 return __kmp_gtid_get_specific();
134 }
135 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"));
136
137 stack_addr = (char *)&stack_data;
138 other_threads = __kmp_threads;
139
140 /* ATT: The code below is a source of potential bugs due to unsynchronized
141 access to __kmp_threads array. For example:
142 1. Current thread loads other_threads[i] to thr and checks it, it is
143 non-NULL.
144 2. Current thread is suspended by OS.
145 3. Another thread unregisters and finishes (debug versions of free()
146 may fill memory with something like 0xEF).
147 4. Current thread is resumed.
148 5. Current thread reads junk from *thr.
149 TODO: Fix it. --ln */
150
151 for (i = 0; i < __kmp_threads_capacity; i++) {
152
153 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
154 if (!thr)
155 continue;
156
157 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
158 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
159
160 /* stack grows down -- search through all of the active threads */
161
162 if (stack_addr <= stack_base) {
163 size_t stack_diff = stack_base - stack_addr;
164
165 if (stack_diff <= stack_size) {
166 /* The only way we can be closer than the allocated */
167 /* stack size is if we are running on this thread. */
168 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
169 return i;
170 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000171 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000172 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000173
Jonathan Peyton30419822017-05-12 18:01:32 +0000174 /* get specific to try and determine our gtid */
175 KA_TRACE(1000,
176 ("*** __kmp_get_global_thread_id: internal alg. failed to find "
177 "thread, using TLS\n"));
178 i = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000179
Jonathan Peyton30419822017-05-12 18:01:32 +0000180 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000181
Jonathan Peyton30419822017-05-12 18:01:32 +0000182 /* if we havn't been assigned a gtid, then return code */
183 if (i < 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000184 return i;
Jonathan Peyton30419822017-05-12 18:01:32 +0000185
186 /* dynamically updated stack window for uber threads to avoid get_specific
187 call */
188 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
189 KMP_FATAL(StackOverflow, i);
190 }
191
192 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
193 if (stack_addr > stack_base) {
194 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
195 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
196 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
197 stack_base);
198 } else {
199 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
200 stack_base - stack_addr);
201 }
202
203 /* Reprint stack bounds for ubermaster since they have been refined */
204 if (__kmp_storage_map) {
205 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
207 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
208 other_threads[i]->th.th_info.ds.ds_stacksize,
209 "th_%d stack (refinement)", i);
210 }
211 return i;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000212}
213
Jonathan Peyton30419822017-05-12 18:01:32 +0000214int __kmp_get_global_thread_id_reg() {
215 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000216
Jonathan Peyton30419822017-05-12 18:01:32 +0000217 if (!__kmp_init_serial) {
218 gtid = KMP_GTID_DNE;
219 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000220#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +0000221 if (TCR_4(__kmp_gtid_mode) >= 3) {
222 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"));
223 gtid = __kmp_gtid;
224 } else
Jim Cownie5e8470a2013-09-27 10:38:44 +0000225#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000226 if (TCR_4(__kmp_gtid_mode) >= 2) {
227 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
228 gtid = __kmp_gtid_get_specific();
229 } else {
230 KA_TRACE(1000,
231 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
232 gtid = __kmp_get_global_thread_id();
233 }
234
235 /* we must be a new uber master sibling thread */
236 if (gtid == KMP_GTID_DNE) {
237 KA_TRACE(10,
238 ("__kmp_get_global_thread_id_reg: Encountered new root thread. "
239 "Registering a new gtid.\n"));
240 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
241 if (!__kmp_init_serial) {
242 __kmp_do_serial_initialize();
243 gtid = __kmp_gtid_get_specific();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000244 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000245 gtid = __kmp_register_root(FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000246 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000247 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
248 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
249 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000250
Jonathan Peyton30419822017-05-12 18:01:32 +0000251 KMP_DEBUG_ASSERT(gtid >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252
Jonathan Peyton30419822017-05-12 18:01:32 +0000253 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000254}
255
256/* caller must hold forkjoin_lock */
Jonathan Peyton30419822017-05-12 18:01:32 +0000257void __kmp_check_stack_overlap(kmp_info_t *th) {
258 int f;
259 char *stack_beg = NULL;
260 char *stack_end = NULL;
261 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000262
Jonathan Peyton30419822017-05-12 18:01:32 +0000263 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"));
264 if (__kmp_storage_map) {
265 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
266 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000267
Jonathan Peyton30419822017-05-12 18:01:32 +0000268 gtid = __kmp_gtid_from_thread(th);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000269
Jonathan Peyton30419822017-05-12 18:01:32 +0000270 if (gtid == KMP_GTID_MONITOR) {
271 __kmp_print_storage_map_gtid(
272 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
273 "th_%s stack (%s)", "mon",
274 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000275 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +0000276 __kmp_print_storage_map_gtid(
277 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
278 "th_%d stack (%s)", gtid,
279 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");
280 }
281 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000282
Jonathan Peyton30419822017-05-12 18:01:32 +0000283 /* No point in checking ubermaster threads since they use refinement and
284 * cannot overlap */
285 gtid = __kmp_gtid_from_thread(th);
286 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
287 KA_TRACE(10,
288 ("__kmp_check_stack_overlap: performing extensive checking\n"));
289 if (stack_beg == NULL) {
290 stack_end = (char *)th->th.th_info.ds.ds_stackbase;
291 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
292 }
293
294 for (f = 0; f < __kmp_threads_capacity; f++) {
295 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
296
297 if (f_th && f_th != th) {
298 char *other_stack_end =
299 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
300 char *other_stack_beg =
301 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
302 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
303 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
304
305 /* Print the other stack values before the abort */
306 if (__kmp_storage_map)
307 __kmp_print_storage_map_gtid(
308 -1, other_stack_beg, other_stack_end,
309 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
310 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
311
312 __kmp_msg(kmp_ms_fatal, KMP_MSG(StackOverlap),
313 KMP_HNT(ChangeStackLimit), __kmp_msg_null);
314 }
315 }
316 }
317 }
318 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"));
319}
320
321/* ------------------------------------------------------------------------ */
322
323void __kmp_infinite_loop(void) {
324 static int done = FALSE;
325
326 while (!done) {
327 KMP_YIELD(1);
328 }
329}
330
331#define MAX_MESSAGE 512
332
333void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
334 char const *format, ...) {
335 char buffer[MAX_MESSAGE];
336 va_list ap;
337
338 va_start(ap, format);
339 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,
340 p2, (unsigned long)size, format);
341 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
342 __kmp_vprintf(kmp_err, buffer, ap);
343#if KMP_PRINT_DATA_PLACEMENT
344 int node;
345 if (gtid >= 0) {
346 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {
347 if (__kmp_storage_map_verbose) {
348 node = __kmp_get_host_node(p1);
349 if (node < 0) /* doesn't work, so don't try this next time */
350 __kmp_storage_map_verbose = FALSE;
351 else {
352 char *last;
353 int lastNode;
354 int localProc = __kmp_get_cpu_from_gtid(gtid);
355
356 const int page_size = KMP_GET_PAGE_SIZE();
357
358 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));
359 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));
360 if (localProc >= 0)
361 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,
362 localProc >> 1);
363 else
364 __kmp_printf_no_lock(" GTID %d\n", gtid);
365#if KMP_USE_PRCTL
366 /* The more elaborate format is disabled for now because of the prctl
367 * hanging bug. */
368 do {
369 last = p1;
370 lastNode = node;
371 /* This loop collates adjacent pages with the same host node. */
372 do {
373 (char *)p1 += page_size;
374 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
375 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,
376 lastNode);
377 } while (p1 <= p2);
378#else
379 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
380 (char *)p1 + (page_size - 1),
381 __kmp_get_host_node(p1));
382 if (p1 < p2) {
383 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
384 (char *)p2 + (page_size - 1),
385 __kmp_get_host_node(p2));
386 }
387#endif
388 }
389 }
390 } else
391 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning));
392 }
393#endif /* KMP_PRINT_DATA_PLACEMENT */
394 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
395}
396
397void __kmp_warn(char const *format, ...) {
398 char buffer[MAX_MESSAGE];
399 va_list ap;
400
401 if (__kmp_generate_warnings == kmp_warnings_off) {
402 return;
403 }
404
405 va_start(ap, format);
406
407 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format);
408 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
409 __kmp_vprintf(kmp_err, buffer, ap);
410 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
411
412 va_end(ap);
413}
414
415void __kmp_abort_process() {
416 // Later threads may stall here, but that's ok because abort() will kill them.
417 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
418
419 if (__kmp_debug_buf) {
420 __kmp_dump_debug_buffer();
421 }; // if
422
423 if (KMP_OS_WINDOWS) {
424 // Let other threads know of abnormal termination and prevent deadlock
425 // if abort happened during library initialization or shutdown
426 __kmp_global.g.g_abort = SIGABRT;
427
428 /* On Windows* OS by default abort() causes pop-up error box, which stalls
429 nightly testing. Unfortunately, we cannot reliably suppress pop-up error
430 boxes. _set_abort_behavior() works well, but this function is not
431 available in VS7 (this is not problem for DLL, but it is a problem for
432 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not
433 help, at least in some versions of MS C RTL.
434
435 It seems following sequence is the only way to simulate abort() and
436 avoid pop-up error box. */
437 raise(SIGABRT);
438 _exit(3); // Just in case, if signal ignored, exit anyway.
439 } else {
440 abort();
441 }; // if
442
443 __kmp_infinite_loop();
444 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000445
446} // __kmp_abort_process
447
Jonathan Peyton30419822017-05-12 18:01:32 +0000448void __kmp_abort_thread(void) {
449 // TODO: Eliminate g_abort global variable and this function.
450 // In case of abort just call abort(), it will kill all the threads.
451 __kmp_infinite_loop();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000452} // __kmp_abort_thread
453
Jonathan Peyton30419822017-05-12 18:01:32 +0000454/* Print out the storage map for the major kmp_info_t thread data structures
455 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000456
Jonathan Peyton30419822017-05-12 18:01:32 +0000457static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {
458 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",
459 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000460
Jonathan Peyton30419822017-05-12 18:01:32 +0000461 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
462 sizeof(kmp_desc_t), "th_%d.th_info", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000463
Jonathan Peyton30419822017-05-12 18:01:32 +0000464 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
465 sizeof(kmp_local_t), "th_%d.th_local", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000466
Jonathan Peyton30419822017-05-12 18:01:32 +0000467 __kmp_print_storage_map_gtid(
468 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
469 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000470
Jonathan Peyton30419822017-05-12 18:01:32 +0000471 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
472 &thr->th.th_bar[bs_plain_barrier + 1],
473 sizeof(kmp_balign_t), "th_%d.th_bar[plain]",
474 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000475
Jonathan Peyton30419822017-05-12 18:01:32 +0000476 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
477 &thr->th.th_bar[bs_forkjoin_barrier + 1],
478 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",
479 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000480
Jonathan Peyton30419822017-05-12 18:01:32 +0000481#if KMP_FAST_REDUCTION_BARRIER
482 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
483 &thr->th.th_bar[bs_reduction_barrier + 1],
484 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",
485 gtid);
486#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000487}
488
Jonathan Peyton30419822017-05-12 18:01:32 +0000489/* Print out the storage map for the major kmp_team_t team data structures
490 that are allocated together. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000491
Jonathan Peyton30419822017-05-12 18:01:32 +0000492static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
493 int team_id, int num_thr) {
494 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
495 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
496 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000497
Jonathan Peyton30419822017-05-12 18:01:32 +0000498 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
499 &team->t.t_bar[bs_last_barrier],
500 sizeof(kmp_balign_team_t) * bs_last_barrier,
501 "%s_%d.t_bar", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000502
Jonathan Peyton30419822017-05-12 18:01:32 +0000503 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
504 &team->t.t_bar[bs_plain_barrier + 1],
505 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",
506 header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000507
Jonathan Peyton30419822017-05-12 18:01:32 +0000508 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
509 &team->t.t_bar[bs_forkjoin_barrier + 1],
510 sizeof(kmp_balign_team_t),
511 "%s_%d.t_bar[forkjoin]", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000512
Jonathan Peyton30419822017-05-12 18:01:32 +0000513#if KMP_FAST_REDUCTION_BARRIER
514 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
515 &team->t.t_bar[bs_reduction_barrier + 1],
516 sizeof(kmp_balign_team_t),
517 "%s_%d.t_bar[reduction]", header, team_id);
518#endif // KMP_FAST_REDUCTION_BARRIER
Jim Cownie5e8470a2013-09-27 10:38:44 +0000519
Jonathan Peyton30419822017-05-12 18:01:32 +0000520 __kmp_print_storage_map_gtid(
521 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
522 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000523
Jonathan Peyton30419822017-05-12 18:01:32 +0000524 __kmp_print_storage_map_gtid(
525 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
526 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000527
Jonathan Peyton30419822017-05-12 18:01:32 +0000528 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
529 &team->t.t_disp_buffer[num_disp_buff],
530 sizeof(dispatch_shared_info_t) * num_disp_buff,
531 "%s_%d.t_disp_buffer", header, team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000532
Jonathan Peyton30419822017-05-12 18:01:32 +0000533 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
534 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header,
535 team_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000536}
537
538static void __kmp_init_allocator() {}
539static void __kmp_fini_allocator() {}
Jim Cownie5e8470a2013-09-27 10:38:44 +0000540
541/* ------------------------------------------------------------------------ */
542
Jonathan Peyton99016992015-05-26 17:32:53 +0000543#ifdef KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +0000544#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +0000545
Jonathan Peyton30419822017-05-12 18:01:32 +0000546static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
547 // TODO: Change to __kmp_break_bootstrap_lock().
548 __kmp_init_bootstrap_lock(lck); // make the lock released
Jim Cownie5e8470a2013-09-27 10:38:44 +0000549}
550
Jonathan Peyton30419822017-05-12 18:01:32 +0000551static void __kmp_reset_locks_on_process_detach(int gtid_req) {
552 int i;
553 int thread_count;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000554
Jonathan Peyton30419822017-05-12 18:01:32 +0000555 // PROCESS_DETACH is expected to be called by a thread that executes
556 // ProcessExit() or FreeLibrary(). OS terminates other threads (except the one
557 // calling ProcessExit or FreeLibrary). So, it might be safe to access the
558 // __kmp_threads[] without taking the forkjoin_lock. However, in fact, some
559 // threads can be still alive here, although being about to be terminated. The
560 // threads in the array with ds_thread==0 are most suspicious. Actually, it
561 // can be not safe to access the __kmp_threads[].
Jim Cownie5e8470a2013-09-27 10:38:44 +0000562
Jonathan Peyton30419822017-05-12 18:01:32 +0000563 // TODO: does it make sense to check __kmp_roots[] ?
Jim Cownie5e8470a2013-09-27 10:38:44 +0000564
Jonathan Peyton30419822017-05-12 18:01:32 +0000565 // Let's check that there are no other alive threads registered with the OMP
566 // lib.
567 while (1) {
568 thread_count = 0;
569 for (i = 0; i < __kmp_threads_capacity; ++i) {
570 if (!__kmp_threads)
571 continue;
572 kmp_info_t *th = __kmp_threads[i];
573 if (th == NULL)
574 continue;
575 int gtid = th->th.th_info.ds.ds_gtid;
576 if (gtid == gtid_req)
577 continue;
578 if (gtid < 0)
579 continue;
580 DWORD exit_val;
581 int alive = __kmp_is_thread_alive(th, &exit_val);
582 if (alive) {
583 ++thread_count;
584 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000585 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000586 if (thread_count == 0)
587 break; // success
588 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000589
Jonathan Peyton30419822017-05-12 18:01:32 +0000590 // Assume that I'm alone. Now it might be safe to check and reset locks.
591 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
592 __kmp_reset_lock(&__kmp_forkjoin_lock);
593#ifdef KMP_DEBUG
594 __kmp_reset_lock(&__kmp_stdio_lock);
595#endif // KMP_DEBUG
Jim Cownie5e8470a2013-09-27 10:38:44 +0000596}
597
Jonathan Peyton30419822017-05-12 18:01:32 +0000598BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
599 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000600
Jonathan Peyton30419822017-05-12 18:01:32 +0000601 switch (fdwReason) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000602
Jonathan Peyton30419822017-05-12 18:01:32 +0000603 case DLL_PROCESS_ATTACH:
604 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000605
606 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +0000607
608 case DLL_PROCESS_DETACH:
609 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
610
611 if (lpReserved != NULL) {
612 // lpReserved is used for telling the difference:
613 // lpReserved == NULL when FreeLibrary() was called,
614 // lpReserved != NULL when the process terminates.
615 // When FreeLibrary() is called, worker threads remain alive. So they will
616 // release the forkjoin lock by themselves. When the process terminates,
617 // worker threads disappear triggering the problem of unreleased forkjoin
618 // lock as described below.
619
620 // A worker thread can take the forkjoin lock. The problem comes up if
621 // that worker thread becomes dead before it releases the forkjoin lock.
622 // The forkjoin lock remains taken, while the thread executing
623 // DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below will try
624 // to take the forkjoin lock and will always fail, so that the application
625 // will never finish [normally]. This scenario is possible if
626 // __kmpc_end() has not been executed. It looks like it's not a corner
627 // case, but common cases:
628 // - the main function was compiled by an alternative compiler;
629 // - the main function was compiled by icl but without /Qopenmp
630 // (application with plugins);
631 // - application terminates by calling C exit(), Fortran CALL EXIT() or
632 // Fortran STOP.
633 // - alive foreign thread prevented __kmpc_end from doing cleanup.
634 //
635 // This is a hack to work around the problem.
636 // TODO: !!! figure out something better.
637 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
638 }
639
640 __kmp_internal_end_library(__kmp_gtid_get_specific());
641
642 return TRUE;
643
644 case DLL_THREAD_ATTACH:
645 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"));
646
647 /* if we want to register new siblings all the time here call
648 * __kmp_get_gtid(); */
649 return TRUE;
650
651 case DLL_THREAD_DETACH:
652 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
653
654 __kmp_internal_end_thread(__kmp_gtid_get_specific());
655 return TRUE;
656 }
657
658 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000659}
660
Jonathan Peyton30419822017-05-12 18:01:32 +0000661#endif /* KMP_OS_WINDOWS */
Jonathan Peyton99016992015-05-26 17:32:53 +0000662#endif /* KMP_DYNAMIC_LIB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000663
Jim Cownie5e8470a2013-09-27 10:38:44 +0000664/* Change the library type to "status" and return the old type */
665/* called from within initialization routines where __kmp_initz_lock is held */
Jonathan Peyton30419822017-05-12 18:01:32 +0000666int __kmp_change_library(int status) {
667 int old_status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000668
Jonathan Peyton30419822017-05-12 18:01:32 +0000669 old_status = __kmp_yield_init &
670 1; // check whether KMP_LIBRARY=throughput (even init count)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000671
Jonathan Peyton30419822017-05-12 18:01:32 +0000672 if (status) {
673 __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
674 } else {
675 __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
676 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000677
Jonathan Peyton30419822017-05-12 18:01:32 +0000678 return old_status; // return previous setting of whether
679 // KMP_LIBRARY=throughput
Jim Cownie5e8470a2013-09-27 10:38:44 +0000680}
681
Jonathan Peyton30419822017-05-12 18:01:32 +0000682/* __kmp_parallel_deo -- Wait until it's our turn. */
683void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
684 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000685#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000686 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000687#endif /* BUILD_PARALLEL_ORDERED */
688
Jonathan Peyton30419822017-05-12 18:01:32 +0000689 if (__kmp_env_consistency_check) {
690 if (__kmp_threads[gtid]->th.th_root->r.r_active)
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000691#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +0000692 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000693#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000694 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000695#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000696 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000697#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000698 if (!team->t.t_serialized) {
699 KMP_MB();
700 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
701 KMP_EQ, NULL);
702 KMP_MB();
703 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000704#endif /* BUILD_PARALLEL_ORDERED */
705}
706
Jonathan Peyton30419822017-05-12 18:01:32 +0000707/* __kmp_parallel_dxo -- Signal the next task. */
708void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
709 int gtid = *gtid_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000710#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000711 int tid = __kmp_tid_from_gtid(gtid);
712 kmp_team_t *team = __kmp_team_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000713#endif /* BUILD_PARALLEL_ORDERED */
714
Jonathan Peyton30419822017-05-12 18:01:32 +0000715 if (__kmp_env_consistency_check) {
716 if (__kmp_threads[gtid]->th.th_root->r.r_active)
717 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
718 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000719#ifdef BUILD_PARALLEL_ORDERED
Jonathan Peyton30419822017-05-12 18:01:32 +0000720 if (!team->t.t_serialized) {
721 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000722
Jonathan Peyton30419822017-05-12 18:01:32 +0000723 /* use the tid of the next thread in this team */
724 /* TODO replace with general release procedure */
725 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000726
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000727#if OMPT_SUPPORT && OMPT_BLAME
Jonathan Peyton30419822017-05-12 18:01:32 +0000728 if (ompt_enabled &&
729 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
730 /* accept blame for "ordered" waiting */
731 kmp_info_t *this_thread = __kmp_threads[gtid];
732 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
733 this_thread->th.ompt_thread_info.wait_id);
734 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000735#endif
736
Jonathan Peyton30419822017-05-12 18:01:32 +0000737 KMP_MB(); /* Flush all pending memory write invalidates. */
738 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000739#endif /* BUILD_PARALLEL_ORDERED */
740}
741
742/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000743/* The BARRIER for a SINGLE process section is always explicit */
744
Jonathan Peyton30419822017-05-12 18:01:32 +0000745int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
746 int status;
747 kmp_info_t *th;
748 kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000749
Jonathan Peyton30419822017-05-12 18:01:32 +0000750 if (!TCR_4(__kmp_init_parallel))
751 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000752
Jonathan Peyton30419822017-05-12 18:01:32 +0000753 th = __kmp_threads[gtid];
754 team = th->th.th_team;
755 status = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000756
Jonathan Peyton30419822017-05-12 18:01:32 +0000757 th->th.th_ident = id_ref;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000758
Jonathan Peyton30419822017-05-12 18:01:32 +0000759 if (team->t.t_serialized) {
760 status = 1;
761 } else {
762 kmp_int32 old_this = th->th.th_local.this_construct;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000763
Jonathan Peyton30419822017-05-12 18:01:32 +0000764 ++th->th.th_local.this_construct;
765 /* try to set team count to thread count--success means thread got the
766 single block */
767 /* TODO: Should this be acquire or release? */
768 if (team->t.t_construct == old_this) {
769 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
770 th->th.th_local.this_construct);
771 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000772#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000773 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
774 KMP_MASTER_GTID(gtid) &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000775#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000776 th->th.th_teams_microtask == NULL &&
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000777#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000778 team->t.t_active_level ==
779 1) { // Only report metadata by master of active team at level 1
780 __kmp_itt_metadata_single(id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000781 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000782#endif /* USE_ITT_BUILD */
783 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000784
Jonathan Peyton30419822017-05-12 18:01:32 +0000785 if (__kmp_env_consistency_check) {
786 if (status && push_ws) {
787 __kmp_push_workshare(gtid, ct_psingle, id_ref);
788 } else {
789 __kmp_check_workshare(gtid, ct_psingle, id_ref);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000790 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000791 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000792#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000793 if (status) {
794 __kmp_itt_single_start(gtid);
795 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000796#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000797 return status;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000798}
799
Jonathan Peyton30419822017-05-12 18:01:32 +0000800void __kmp_exit_single(int gtid) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000801#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +0000802 __kmp_itt_single_end(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000803#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +0000804 if (__kmp_env_consistency_check)
805 __kmp_pop_workshare(gtid, ct_psingle, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000806}
807
Jonathan Peyton30419822017-05-12 18:01:32 +0000808/* determine if we can go parallel or must use a serialized parallel region and
Jim Cownie5e8470a2013-09-27 10:38:44 +0000809 * how many threads we can use
810 * set_nproc is the number of threads requested for the team
811 * returns 0 if we should serialize or only use one thread,
812 * otherwise the number of threads to use
Jonathan Peyton30419822017-05-12 18:01:32 +0000813 * The forkjoin lock is held by the caller. */
814static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
815 int master_tid, int set_nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +0000816#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000817 ,
818 int enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +0000819#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +0000820 ) {
821 int capacity;
822 int new_nthreads;
823 KMP_DEBUG_ASSERT(__kmp_init_serial);
824 KMP_DEBUG_ASSERT(root && parent_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000825
Jonathan Peyton30419822017-05-12 18:01:32 +0000826 // If dyn-var is set, dynamically adjust the number of desired threads,
827 // according to the method specified by dynamic_mode.
828 new_nthreads = set_nthreads;
829 if (!get__dynamic_2(parent_team, master_tid)) {
830 ;
831 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000832#ifdef USE_LOAD_BALANCE
Jonathan Peyton30419822017-05-12 18:01:32 +0000833 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
834 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
835 if (new_nthreads == 1) {
836 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
837 "reservation to 1 thread\n",
838 master_tid));
839 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000840 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000841 if (new_nthreads < set_nthreads) {
842 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "
843 "reservation to %d threads\n",
844 master_tid, new_nthreads));
845 }
846 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000847#endif /* USE_LOAD_BALANCE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000848 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
849 new_nthreads = __kmp_avail_proc - __kmp_nth +
850 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
851 if (new_nthreads <= 1) {
852 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
853 "reservation to 1 thread\n",
854 master_tid));
855 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000856 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000857 if (new_nthreads < set_nthreads) {
858 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "
859 "reservation to %d threads\n",
860 master_tid, new_nthreads));
861 } else {
862 new_nthreads = set_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000863 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000864 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
865 if (set_nthreads > 2) {
866 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
867 new_nthreads = (new_nthreads % set_nthreads) + 1;
868 if (new_nthreads == 1) {
869 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
870 "reservation to 1 thread\n",
871 master_tid));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000872 return 1;
Jonathan Peyton30419822017-05-12 18:01:32 +0000873 }
874 if (new_nthreads < set_nthreads) {
875 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "
876 "reservation to %d threads\n",
877 master_tid, new_nthreads));
878 }
879 }
880 } else {
881 KMP_ASSERT(0);
882 }
883
884 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
885 if (__kmp_nth + new_nthreads -
886 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
887 __kmp_max_nth) {
888 int tl_nthreads = __kmp_max_nth - __kmp_nth +
889 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
890 if (tl_nthreads <= 0) {
891 tl_nthreads = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000892 }
893
Jonathan Peyton30419822017-05-12 18:01:32 +0000894 // If dyn-var is false, emit a 1-time warning.
895 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
896 __kmp_reserve_warn = 1;
897 __kmp_msg(kmp_ms_warning,
898 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
899 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
900 }
901 if (tl_nthreads == 1) {
902 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced "
903 "reservation to 1 thread\n",
904 master_tid));
905 return 1;
906 }
907 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced "
908 "reservation to %d threads\n",
909 master_tid, tl_nthreads));
910 new_nthreads = tl_nthreads;
911 }
912
913 // Check if the threads array is large enough, or needs expanding.
Jonathan Peyton30419822017-05-12 18:01:32 +0000914 // See comment in __kmp_register_root() about the adjustment if
915 // __kmp_threads[0] == NULL.
916 capacity = __kmp_threads_capacity;
917 if (TCR_PTR(__kmp_threads[0]) == NULL) {
918 --capacity;
919 }
920 if (__kmp_nth + new_nthreads -
921 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
922 capacity) {
923 // Expand the threads array.
924 int slotsRequired = __kmp_nth + new_nthreads -
925 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
926 capacity;
927 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
928 if (slotsAdded < slotsRequired) {
929 // The threads array was not expanded enough.
930 new_nthreads -= (slotsRequired - slotsAdded);
931 KMP_ASSERT(new_nthreads >= 1);
932
933 // If dyn-var is false, emit a 1-time warning.
934 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
935 __kmp_reserve_warn = 1;
936 if (__kmp_tp_cached) {
937 __kmp_msg(kmp_ms_warning,
938 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
939 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
940 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
941 } else {
942 __kmp_msg(kmp_ms_warning,
943 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
944 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
945 }
946 }
947 }
948 }
949
Jonathan Peyton642688b2017-06-01 16:46:36 +0000950#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000951 if (new_nthreads == 1) {
952 KC_TRACE(10,
953 ("__kmp_reserve_threads: T#%d serializing team after reclaiming "
954 "dead roots and rechecking; requested %d threads\n",
955 __kmp_get_gtid(), set_nthreads));
Jonathan Peyton642688b2017-06-01 16:46:36 +0000956 } else {
957 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"
958 " %d threads\n",
959 __kmp_get_gtid(), new_nthreads, set_nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +0000960 }
Jonathan Peyton642688b2017-06-01 16:46:36 +0000961#endif // KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000962 return new_nthreads;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000963}
964
Jonathan Peyton30419822017-05-12 18:01:32 +0000965/* Allocate threads from the thread pool and assign them to the new team. We are
966 assured that there are enough threads available, because we checked on that
967 earlier within critical section forkjoin */
968static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
969 kmp_info_t *master_th, int master_gtid) {
970 int i;
971 int use_hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000972
Jonathan Peyton30419822017-05-12 18:01:32 +0000973 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
974 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
975 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000976
Jonathan Peyton30419822017-05-12 18:01:32 +0000977 /* first, let's setup the master thread */
978 master_th->th.th_info.ds.ds_tid = 0;
979 master_th->th.th_team = team;
980 master_th->th.th_team_nproc = team->t.t_nproc;
981 master_th->th.th_team_master = master_th;
982 master_th->th.th_team_serialized = FALSE;
983 master_th->th.th_dispatch = &team->t.t_dispatch[0];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000984
Jonathan Peyton30419822017-05-12 18:01:32 +0000985/* make sure we are not the optimized hot team */
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000986#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +0000987 use_hot_team = 0;
988 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
989 if (hot_teams) { // hot teams array is not allocated if
990 // KMP_HOT_TEAMS_MAX_LEVEL=0
991 int level = team->t.t_active_level - 1; // index in array of hot teams
992 if (master_th->th.th_teams_microtask) { // are we inside the teams?
993 if (master_th->th.th_teams_size.nteams > 1) {
994 ++level; // level was not increased in teams construct for
995 // team_of_masters
996 }
997 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
998 master_th->th.th_teams_level == team->t.t_level) {
999 ++level; // level was not increased in teams construct for
1000 // team_of_workers before the parallel
1001 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001002 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001003 if (level < __kmp_hot_teams_max_level) {
1004 if (hot_teams[level].hot_team) {
1005 // hot team has already been allocated for given level
1006 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1007 use_hot_team = 1; // the team is ready to use
1008 } else {
1009 use_hot_team = 0; // AC: threads are not allocated yet
1010 hot_teams[level].hot_team = team; // remember new hot team
1011 hot_teams[level].hot_team_nth = team->t.t_nproc;
1012 }
1013 } else {
1014 use_hot_team = 0;
1015 }
1016 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001017#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001018 use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001019#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001020 if (!use_hot_team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001021
Jonathan Peyton30419822017-05-12 18:01:32 +00001022 /* install the master thread */
1023 team->t.t_threads[0] = master_th;
1024 __kmp_initialize_info(master_th, team, 0, master_gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001025
Jonathan Peyton30419822017-05-12 18:01:32 +00001026 /* now, install the worker threads */
1027 for (i = 1; i < team->t.t_nproc; i++) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001028
Jonathan Peyton30419822017-05-12 18:01:32 +00001029 /* fork or reallocate a new thread and install it in team */
1030 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1031 team->t.t_threads[i] = thr;
1032 KMP_DEBUG_ASSERT(thr);
1033 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1034 /* align team and thread arrived states */
1035 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1036 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1037 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1038 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1039 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1040 team->t.t_bar[bs_plain_barrier].b_arrived));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001041#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001042 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1043 thr->th.th_teams_level = master_th->th.th_teams_level;
1044 thr->th.th_teams_size = master_th->th.th_teams_size;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001045#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001046 { // Initialize threads' barrier data.
1047 int b;
1048 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1049 for (b = 0; b < bs_last_barrier; ++b) {
1050 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1051 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001052#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001053 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001054#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001055 }; // for b
1056 }
1057 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001058
Alp Toker98758b02014-03-02 04:12:06 +00001059#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00001060 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001061#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001062 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001063
Jonathan Peyton30419822017-05-12 18:01:32 +00001064 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001065}
1066
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001067#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001068// Propagate any changes to the floating point control registers out to the team
Jonathan Peyton30419822017-05-12 18:01:32 +00001069// We try to avoid unnecessary writes to the relevant cache line in the team
1070// structure, so we don't make changes unless they are needed.
1071inline static void propagateFPControl(kmp_team_t *team) {
1072 if (__kmp_inherit_fp_control) {
1073 kmp_int16 x87_fpu_control_word;
1074 kmp_uint32 mxcsr;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001075
Jonathan Peyton30419822017-05-12 18:01:32 +00001076 // Get master values of FPU control flags (both X87 and vector)
1077 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1078 __kmp_store_mxcsr(&mxcsr);
1079 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001080
Jonathan Peyton30419822017-05-12 18:01:32 +00001081// There is no point looking at t_fp_control_saved here.
1082// If it is TRUE, we still have to update the values if they are different from
1083// those we now have.
1084// If it is FALSE we didn't save anything yet, but our objective is the same. We
1085// have to ensure that the values in the team are the same as those we have.
1086// So, this code achieves what we need whether or not t_fp_control_saved is
1087// true. By checking whether the value needs updating we avoid unnecessary
1088// writes that would put the cache-line into a written state, causing all
1089// threads in the team to have to read it again.
1090 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1091 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1092 // Although we don't use this value, other code in the runtime wants to know
1093 // whether it should restore them. So we must ensure it is correct.
1094 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1095 } else {
1096 // Similarly here. Don't write to this cache-line in the team structure
1097 // unless we have to.
1098 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1099 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001100}
1101
Jonathan Peyton30419822017-05-12 18:01:32 +00001102// Do the opposite, setting the hardware registers to the updated values from
1103// the team.
1104inline static void updateHWFPControl(kmp_team_t *team) {
1105 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1106 // Only reset the fp control regs if they have been changed in the team.
1107 // the parallel region that we are exiting.
1108 kmp_int16 x87_fpu_control_word;
1109 kmp_uint32 mxcsr;
1110 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1111 __kmp_store_mxcsr(&mxcsr);
1112 mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001113
Jonathan Peyton30419822017-05-12 18:01:32 +00001114 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1115 __kmp_clear_x87_fpu_status_word();
1116 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001117 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001118
1119 if (team->t.t_mxcsr != mxcsr) {
1120 __kmp_load_mxcsr(&team->t.t_mxcsr);
1121 }
1122 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001123}
1124#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001125#define propagateFPControl(x) ((void)0)
1126#define updateHWFPControl(x) ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001127#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1128
Jonathan Peyton30419822017-05-12 18:01:32 +00001129static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
1130 int realloc); // forward declaration
Jim Cownie5e8470a2013-09-27 10:38:44 +00001131
Jonathan Peyton30419822017-05-12 18:01:32 +00001132/* Run a parallel region that has been serialized, so runs only in a team of the
1133 single master thread. */
1134void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
1135 kmp_info_t *this_thr;
1136 kmp_team_t *serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001137
Jonathan Peyton30419822017-05-12 18:01:32 +00001138 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001139
Jonathan Peyton30419822017-05-12 18:01:32 +00001140 /* Skip all this code for autopar serialized loops since it results in
1141 unacceptable overhead */
1142 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
1143 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001144
Jonathan Peyton30419822017-05-12 18:01:32 +00001145 if (!TCR_4(__kmp_init_parallel))
1146 __kmp_parallel_initialize();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001147
Jonathan Peyton30419822017-05-12 18:01:32 +00001148 this_thr = __kmp_threads[global_tid];
1149 serial_team = this_thr->th.th_serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001150
Jonathan Peyton30419822017-05-12 18:01:32 +00001151 /* utilize the serialized team held by this thread */
1152 KMP_DEBUG_ASSERT(serial_team);
1153 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001154
Jonathan Peyton30419822017-05-12 18:01:32 +00001155 if (__kmp_tasking_mode != tskm_immediate_exec) {
1156 KMP_DEBUG_ASSERT(
1157 this_thr->th.th_task_team ==
1158 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1159 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1160 NULL);
1161 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1162 "team %p, new task_team = NULL\n",
1163 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1164 this_thr->th.th_task_team = NULL;
1165 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001166
1167#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001168 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1169 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1170 proc_bind = proc_bind_false;
1171 } else if (proc_bind == proc_bind_default) {
1172 // No proc_bind clause was specified, so use the current value
1173 // of proc-bind-var for this parallel region.
1174 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1175 }
1176 // Reset for next parallel region
1177 this_thr->th.th_set_proc_bind = proc_bind_default;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001178#endif /* OMP_40_ENABLED */
1179
Jonathan Peyton30419822017-05-12 18:01:32 +00001180 if (this_thr->th.th_team != serial_team) {
1181 // Nested level will be an index in the nested nthreads array
1182 int level = this_thr->th.th_team->t.t_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001183
Jonathan Peyton30419822017-05-12 18:01:32 +00001184 if (serial_team->t.t_serialized) {
1185 /* this serial team was already used
1186 TODO increase performance by making this locks more specific */
1187 kmp_team_t *new_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001188
Jonathan Peyton30419822017-05-12 18:01:32 +00001189 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001190
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001191#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001192 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001193#endif
1194
Jonathan Peyton30419822017-05-12 18:01:32 +00001195 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001196#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001197 ompt_parallel_id,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001198#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001199#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001200 proc_bind,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001201#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001202 &this_thr->th.th_current_task->td_icvs,
1203 0 USE_NESTED_HOT_ARG(NULL));
1204 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1205 KMP_ASSERT(new_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001206
Jonathan Peyton30419822017-05-12 18:01:32 +00001207 /* setup new serialized team and install it */
1208 new_team->t.t_threads[0] = this_thr;
1209 new_team->t.t_parent = this_thr->th.th_team;
1210 serial_team = new_team;
1211 this_thr->th.th_serial_team = serial_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001212
Jonathan Peyton30419822017-05-12 18:01:32 +00001213 KF_TRACE(
1214 10,
1215 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1216 global_tid, serial_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001217
Jonathan Peyton30419822017-05-12 18:01:32 +00001218 /* TODO the above breaks the requirement that if we run out of resources,
1219 then we can still guarantee that serialized teams are ok, since we may
1220 need to allocate a new one */
1221 } else {
1222 KF_TRACE(
1223 10,
1224 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1225 global_tid, serial_team));
1226 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001227
Jonathan Peyton30419822017-05-12 18:01:32 +00001228 /* we have to initialize this serial team */
1229 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1230 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1231 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1232 serial_team->t.t_ident = loc;
1233 serial_team->t.t_serialized = 1;
1234 serial_team->t.t_nproc = 1;
1235 serial_team->t.t_parent = this_thr->th.th_team;
1236 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1237 this_thr->th.th_team = serial_team;
1238 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001239
Jonathan Peyton30419822017-05-12 18:01:32 +00001240 KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1241 this_thr->th.th_current_task));
1242 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1243 this_thr->th.th_current_task->td_flags.executing = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001244
Jonathan Peyton30419822017-05-12 18:01:32 +00001245 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001246
Jonathan Peyton30419822017-05-12 18:01:32 +00001247 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
1248 implicit task for each serialized task represented by
1249 team->t.t_serialized? */
1250 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1251 &this_thr->th.th_current_task->td_parent->td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001252
Jonathan Peyton30419822017-05-12 18:01:32 +00001253 // Thread value exists in the nested nthreads array for the next nested
1254 // level
1255 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1256 this_thr->th.th_current_task->td_icvs.nproc =
1257 __kmp_nested_nth.nth[level + 1];
1258 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001259
1260#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001261 if (__kmp_nested_proc_bind.used &&
1262 (level + 1 < __kmp_nested_proc_bind.used)) {
1263 this_thr->th.th_current_task->td_icvs.proc_bind =
1264 __kmp_nested_proc_bind.bind_types[level + 1];
1265 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001266#endif /* OMP_40_ENABLED */
1267
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001268#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00001269 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001270#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001271 this_thr->th.th_info.ds.ds_tid = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001272
Jonathan Peyton30419822017-05-12 18:01:32 +00001273 /* set thread cache values */
1274 this_thr->th.th_team_nproc = 1;
1275 this_thr->th.th_team_master = this_thr;
1276 this_thr->th.th_team_serialized = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001277
Jonathan Peyton30419822017-05-12 18:01:32 +00001278 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1279 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001280
Jonathan Peyton30419822017-05-12 18:01:32 +00001281 propagateFPControl(serial_team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001282
Jonathan Peyton30419822017-05-12 18:01:32 +00001283 /* check if we need to allocate dispatch buffers stack */
1284 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1285 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1286 serial_team->t.t_dispatch->th_disp_buffer =
1287 (dispatch_private_info_t *)__kmp_allocate(
1288 sizeof(dispatch_private_info_t));
1289 }
1290 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001291
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001292#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001293 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1294 __ompt_team_assign_id(serial_team, ompt_parallel_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001295#endif
1296
Jonathan Peyton30419822017-05-12 18:01:32 +00001297 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001298
Jonathan Peyton30419822017-05-12 18:01:32 +00001299 } else {
1300 /* this serialized team is already being used,
1301 * that's fine, just add another nested level */
1302 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1303 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1304 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1305 ++serial_team->t.t_serialized;
1306 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001307
Jonathan Peyton30419822017-05-12 18:01:32 +00001308 // Nested level will be an index in the nested nthreads array
1309 int level = this_thr->th.th_team->t.t_level;
1310 // Thread value exists in the nested nthreads array for the next nested
1311 // level
1312 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1313 this_thr->th.th_current_task->td_icvs.nproc =
1314 __kmp_nested_nth.nth[level + 1];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001315 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001316 serial_team->t.t_level++;
1317 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
1318 "of serial team %p to %d\n",
1319 global_tid, serial_team, serial_team->t.t_level));
1320
1321 /* allocate/push dispatch buffers stack */
1322 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1323 {
1324 dispatch_private_info_t *disp_buffer =
1325 (dispatch_private_info_t *)__kmp_allocate(
1326 sizeof(dispatch_private_info_t));
1327 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1328 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1329 }
1330 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1331
1332 KMP_MB();
1333 }
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001334#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001335 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001336#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001337
Jonathan Peyton30419822017-05-12 18:01:32 +00001338 if (__kmp_env_consistency_check)
1339 __kmp_push_parallel(global_tid, NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001340}
Jim Cownie181b4bb2013-12-23 17:28:57 +00001341
Jim Cownie5e8470a2013-09-27 10:38:44 +00001342/* most of the work for a fork */
1343/* return true if we really went parallel, false if serialized */
Jonathan Peyton30419822017-05-12 18:01:32 +00001344int __kmp_fork_call(ident_t *loc, int gtid,
1345 enum fork_context_e call_context, // Intel, GNU, ...
1346 kmp_int32 argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001347#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001348 void *unwrapped_task,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001349#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001350 microtask_t microtask, launch_t invoker,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001351/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001352#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001353 va_list *ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001354#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001355 va_list ap
Jim Cownie5e8470a2013-09-27 10:38:44 +00001356#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001357 ) {
1358 void **argv;
1359 int i;
1360 int master_tid;
1361 int master_this_cons;
1362 kmp_team_t *team;
1363 kmp_team_t *parent_team;
1364 kmp_info_t *master_th;
1365 kmp_root_t *root;
1366 int nthreads;
1367 int master_active;
1368 int master_set_numthreads;
1369 int level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001370#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001371 int active_level;
1372 int teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001373#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001374#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001375 kmp_hot_team_ptr_t **p_hot_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001376#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001377 { // KMP_TIME_BLOCK
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001378 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001379 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001380
Jonathan Peyton30419822017-05-12 18:01:32 +00001381 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid));
1382 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1383 /* Some systems prefer the stack for the root thread(s) to start with */
1384 /* some gap from the parent stack to prevent false sharing. */
1385 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1386 /* These 2 lines below are so this does not get optimized out */
1387 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1388 __kmp_stkpadding += (short)((kmp_int64)dummy);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001389 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001390
1391 /* initialize if needed */
Jonathan Peyton30419822017-05-12 18:01:32 +00001392 KMP_DEBUG_ASSERT(
1393 __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown
1394 if (!TCR_4(__kmp_init_parallel))
1395 __kmp_parallel_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001396
1397 /* setup current data */
Jonathan Peyton30419822017-05-12 18:01:32 +00001398 master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with
1399 // shutdown
1400 parent_team = master_th->th.th_team;
1401 master_tid = master_th->th.th_info.ds.ds_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001402 master_this_cons = master_th->th.th_local.this_construct;
Jonathan Peyton30419822017-05-12 18:01:32 +00001403 root = master_th->th.th_root;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001404 master_active = root->r.r_active;
1405 master_set_numthreads = master_th->th.th_set_nproc;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001406
1407#if OMPT_SUPPORT
1408 ompt_parallel_id_t ompt_parallel_id;
1409 ompt_task_id_t ompt_task_id;
1410 ompt_frame_t *ompt_frame;
1411 ompt_task_id_t my_task_id;
1412 ompt_parallel_id_t my_parallel_id;
1413
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001414 if (ompt_enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001415 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1416 ompt_task_id = __ompt_get_task_id_internal(0);
1417 ompt_frame = __ompt_get_task_frame_internal(0);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001418 }
1419#endif
1420
Jim Cownie5e8470a2013-09-27 10:38:44 +00001421 // Nested level will be an index in the nested nthreads array
Jonathan Peyton30419822017-05-12 18:01:32 +00001422 level = parent_team->t.t_level;
1423 // used to launch non-serial teams even if nested is not allowed
1424 active_level = parent_team->t.t_active_level;
Jonathan Peytonc76f9f02016-06-21 19:12:07 +00001425#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00001426 // needed to check nesting inside the teams
1427 teams_level = master_th->th.th_teams_level;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001428#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001429#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00001430 p_hot_teams = &master_th->th.th_hot_teams;
1431 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1432 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1433 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1434 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
Jonathan Peyton642688b2017-06-01 16:46:36 +00001435 // it is either actual or not needed (when active_level > 0)
1436 (*p_hot_teams)[0].hot_team_nth = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001437 }
1438#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001439
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001440#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001441 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001442 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001443 int team_size = master_set_numthreads;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001444
Jonathan Peyton30419822017-05-12 18:01:32 +00001445 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1446 ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task,
1447 OMPT_INVOKER(call_context));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001448 }
1449#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001450
Jim Cownie5e8470a2013-09-27 10:38:44 +00001451 master_th->th.th_ident = loc;
1452
1453#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001454 if (master_th->th.th_teams_microtask && ap &&
1455 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1456 // AC: This is start of parallel that is nested inside teams construct.
1457 // The team is actual (hot), all workers are ready at the fork barrier.
1458 // No lock needed to initialize the team a bit, then free workers.
1459 parent_team->t.t_ident = loc;
1460 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1461 parent_team->t.t_argc = argc;
1462 argv = (void **)parent_team->t.t_argv;
1463 for (i = argc - 1; i >= 0; --i)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001464/* TODO: revert workaround for Intel(R) 64 tracker #96 */
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001465#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001466 *argv++ = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001467#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001468 *argv++ = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001469#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001470 // Increment our nested depth levels, but not increase the serialization
1471 if (parent_team == master_th->th.th_serial_team) {
1472 // AC: we are in serialized parallel
1473 __kmpc_serialized_parallel(loc, gtid);
1474 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1475 // AC: need this in order enquiry functions work
1476 // correctly, will restore at join time
1477 parent_team->t.t_serialized--;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001478#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001479 void *dummy;
1480 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001481
Jonathan Peyton30419822017-05-12 18:01:32 +00001482 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001483
Jonathan Peyton30419822017-05-12 18:01:32 +00001484 if (ompt_enabled) {
1485 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task,
1486 ompt_parallel_id);
1487 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1488 exit_runtime_p =
1489 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001490
Jonathan Peyton30419822017-05-12 18:01:32 +00001491 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001492
1493#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001494 /* OMPT implicit task begin */
1495 my_task_id = lw_taskteam.ompt_task_info.task_id;
1496 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
1497 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1498 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1499 my_parallel_id, my_task_id);
1500 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001501#endif
1502
Jonathan Peyton30419822017-05-12 18:01:32 +00001503 /* OMPT state */
1504 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1505 } else {
1506 exit_runtime_p = &dummy;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001507 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001508#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001509
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001510 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001511 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1512 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1513 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1514#if OMPT_SUPPORT
1515 ,
1516 exit_runtime_p
1517#endif
1518 );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001519 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001520
Jonathan Peyton30419822017-05-12 18:01:32 +00001521#if OMPT_SUPPORT
1522 *exit_runtime_p = NULL;
1523 if (ompt_enabled) {
1524#if OMPT_TRACE
1525 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001526
Jonathan Peyton30419822017-05-12 18:01:32 +00001527 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1528 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1529 ompt_parallel_id, ompt_task_id);
1530 }
1531
1532 __ompt_lw_taskteam_unlink(master_th);
1533 // reset clear the task id only after unlinking the task
1534 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1535#endif
1536
1537 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1538 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1539 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
1540 }
1541 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1542 }
1543#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001544 return TRUE;
Jonathan Peyton30419822017-05-12 18:01:32 +00001545 }
1546
1547 parent_team->t.t_pkfn = microtask;
1548#if OMPT_SUPPORT
1549 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1550#endif
1551 parent_team->t.t_invoke = invoker;
1552 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
1553 parent_team->t.t_active_level++;
1554 parent_team->t.t_level++;
1555
1556 /* Change number of threads in the team if requested */
1557 if (master_set_numthreads) { // The parallel has num_threads clause
1558 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1559 // AC: only can reduce number of threads dynamically, can't increase
1560 kmp_info_t **other_threads = parent_team->t.t_threads;
1561 parent_team->t.t_nproc = master_set_numthreads;
1562 for (i = 0; i < master_set_numthreads; ++i) {
1563 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1564 }
1565 // Keep extra threads hot in the team for possible next parallels
1566 }
1567 master_th->th.th_set_nproc = 0;
1568 }
1569
1570#if USE_DEBUGGER
1571 if (__kmp_debugging) { // Let debugger override number of threads.
1572 int nth = __kmp_omp_num_threads(loc);
Jonathan Peyton642688b2017-06-01 16:46:36 +00001573 if (nth > 0) { // 0 means debugger doesn't want to change num threads
Jonathan Peyton30419822017-05-12 18:01:32 +00001574 master_set_numthreads = nth;
1575 }; // if
1576 }; // if
1577#endif
1578
1579 KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
1580 "master_th=%p, gtid=%d\n",
1581 root, parent_team, master_th, gtid));
1582 __kmp_internal_fork(loc, gtid, parent_team);
1583 KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, "
1584 "master_th=%p, gtid=%d\n",
1585 root, parent_team, master_th, gtid));
1586
1587 /* Invoke microtask for MASTER thread */
1588 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1589 parent_team->t.t_id, parent_team->t.t_pkfn));
1590
1591 {
1592 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1593 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1594 if (!parent_team->t.t_invoke(gtid)) {
1595 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
1596 }
1597 }
1598 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1599 parent_team->t.t_id, parent_team->t.t_pkfn));
1600 KMP_MB(); /* Flush all pending memory write invalidates. */
1601
1602 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
1603
1604 return TRUE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001605 } // Parallel closely nested in teams construct
Jim Cownie5e8470a2013-09-27 10:38:44 +00001606#endif /* OMP_40_ENABLED */
1607
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001608#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001609 if (__kmp_tasking_mode != tskm_immediate_exec) {
1610 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1611 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001612 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001613#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001614
Jonathan Peyton30419822017-05-12 18:01:32 +00001615 if (parent_team->t.t_active_level >=
1616 master_th->th.th_current_task->td_icvs.max_active_levels) {
1617 nthreads = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001618 } else {
Andrey Churbanov92effc42015-08-18 10:08:27 +00001619#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001620 int enter_teams = ((ap == NULL && active_level == 0) ||
1621 (ap && teams_level > 0 && teams_level == level));
Andrey Churbanov92effc42015-08-18 10:08:27 +00001622#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001623 nthreads =
1624 master_set_numthreads
1625 ? master_set_numthreads
1626 : get__nproc_2(
1627 parent_team,
1628 master_tid); // TODO: get nproc directly from current task
Andrey Churbanov92effc42015-08-18 10:08:27 +00001629
Jonathan Peyton30419822017-05-12 18:01:32 +00001630 // Check if we need to take forkjoin lock? (no need for serialized
1631 // parallel out of teams construct). This code moved here from
1632 // __kmp_reserve_threads() to speedup nested serialized parallels.
1633 if (nthreads > 1) {
1634 if ((!get__nested(master_th) && (root->r.r_in_parallel
Andrey Churbanov92effc42015-08-18 10:08:27 +00001635#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001636 && !enter_teams
Andrey Churbanov92effc42015-08-18 10:08:27 +00001637#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001638 )) ||
1639 (__kmp_library == library_serial)) {
Jonathan Peyton642688b2017-06-01 16:46:36 +00001640 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"
1641 " threads\n",
1642 gtid, nthreads));
Jonathan Peyton30419822017-05-12 18:01:32 +00001643 nthreads = 1;
Andrey Churbanov92effc42015-08-18 10:08:27 +00001644 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001645 }
1646 if (nthreads > 1) {
1647 /* determine how many new threads we can use */
1648 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jonathan Peyton30419822017-05-12 18:01:32 +00001649 nthreads = __kmp_reserve_threads(
1650 root, parent_team, master_tid, nthreads
Jim Cownie5e8470a2013-09-27 10:38:44 +00001651#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001652 /* AC: If we execute teams from parallel region (on host), then
1653 teams should be created but each can only have 1 thread if
1654 nesting is disabled. If teams called from serial region, then
1655 teams and their threads should be created regardless of the
1656 nesting setting. */
1657 ,
1658 enter_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00001659#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001660 );
1661 if (nthreads == 1) {
1662 // Free lock for single thread execution here; for multi-thread
1663 // execution it will be freed later after team of threads created
1664 // and initialized
1665 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Andrey Churbanov92effc42015-08-18 10:08:27 +00001666 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001667 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001668 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001669 KMP_DEBUG_ASSERT(nthreads > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001670
Jonathan Peyton30419822017-05-12 18:01:32 +00001671 // If we temporarily changed the set number of threads then restore it now
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001672 master_th->th.th_set_nproc = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001673
Jim Cownie5e8470a2013-09-27 10:38:44 +00001674 /* create a serialized parallel region? */
Jonathan Peyton30419822017-05-12 18:01:32 +00001675 if (nthreads == 1) {
1676/* josh todo: hypothetical question: what do we do for OS X*? */
1677#if KMP_OS_LINUX && \
1678 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1679 void *args[argc];
Jim Cownie5e8470a2013-09-27 10:38:44 +00001680#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001681 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *));
1682#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \
1683 KMP_ARCH_AARCH64) */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001684
Jonathan Peyton30419822017-05-12 18:01:32 +00001685 KA_TRACE(20,
1686 ("__kmp_fork_call: T#%d serializing parallel region\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001687
Jonathan Peyton30419822017-05-12 18:01:32 +00001688 __kmpc_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001689
Jonathan Peyton30419822017-05-12 18:01:32 +00001690 if (call_context == fork_context_intel) {
1691 /* TODO this sucks, use the compiler itself to pass args! :) */
1692 master_th->th.th_serial_team->t.t_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001693#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001694 if (!ap) {
1695 // revert change made in __kmpc_serialized_parallel()
1696 master_th->th.th_serial_team->t.t_level--;
1697// Get args from parent team for teams construct
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001698
1699#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001700 void *dummy;
1701 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001702
Jonathan Peyton30419822017-05-12 18:01:32 +00001703 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001704
Jonathan Peyton30419822017-05-12 18:01:32 +00001705 if (ompt_enabled) {
1706 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1707 unwrapped_task, ompt_parallel_id);
1708 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1709 exit_runtime_p =
1710 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001711
Jonathan Peyton30419822017-05-12 18:01:32 +00001712 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001713
1714#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001715 my_task_id = lw_taskteam.ompt_task_info.task_id;
1716 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1717 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1718 ompt_parallel_id, my_task_id);
1719 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001720#endif
1721
Jonathan Peyton30419822017-05-12 18:01:32 +00001722 /* OMPT state */
1723 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1724 } else {
1725 exit_runtime_p = &dummy;
1726 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001727#endif
1728
Jonathan Peyton30419822017-05-12 18:01:32 +00001729 {
1730 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1731 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1732 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1733 parent_team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001734#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001735 ,
1736 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001737#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001738 );
1739 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001740
1741#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001742 *exit_runtime_p = NULL;
1743 if (ompt_enabled) {
1744 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001745
1746#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001747 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1748 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1749 ompt_parallel_id, ompt_task_id);
1750 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001751#endif
1752
Jonathan Peyton30419822017-05-12 18:01:32 +00001753 __ompt_lw_taskteam_unlink(master_th);
1754 // reset clear the task id only after unlinking the task
1755 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001756
Jonathan Peyton30419822017-05-12 18:01:32 +00001757 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1758 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1759 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
1760 }
1761 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1762 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001763#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001764 } else if (microtask == (microtask_t)__kmp_teams_master) {
1765 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1766 master_th->th.th_serial_team);
1767 team = master_th->th.th_team;
1768 // team->t.t_pkfn = microtask;
1769 team->t.t_invoke = invoker;
1770 __kmp_alloc_argv_entries(argc, team, TRUE);
1771 team->t.t_argc = argc;
1772 argv = (void **)team->t.t_argv;
1773 if (ap) {
1774 for (i = argc - 1; i >= 0; --i)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001775// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001776#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00001777 *argv++ = va_arg(*ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001778#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001779 *argv++ = va_arg(ap, void *);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001780#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001781 } else {
1782 for (i = 0; i < argc; ++i)
1783 // Get args from parent team for teams construct
1784 argv[i] = parent_team->t.t_argv[i];
1785 }
1786 // AC: revert change made in __kmpc_serialized_parallel()
1787 // because initial code in teams should have level=0
1788 team->t.t_level--;
1789 // AC: call special invoker for outer "parallel" of teams construct
1790 {
1791 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1792 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1793 invoker(gtid);
1794 }
1795 } else {
1796#endif /* OMP_40_ENABLED */
1797 argv = args;
1798 for (i = argc - 1; i >= 0; --i)
1799// TODO: revert workaround for Intel(R) 64 tracker #96
1800#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1801 *argv++ = va_arg(*ap, void *);
1802#else
1803 *argv++ = va_arg(ap, void *);
1804#endif
1805 KMP_MB();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001806
1807#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001808 void *dummy;
1809 void **exit_runtime_p;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001810
Jonathan Peyton30419822017-05-12 18:01:32 +00001811 ompt_lw_taskteam_t lw_taskteam;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001812
Jonathan Peyton30419822017-05-12 18:01:32 +00001813 if (ompt_enabled) {
1814 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1815 unwrapped_task, ompt_parallel_id);
1816 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1817 exit_runtime_p =
1818 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001819
Jonathan Peyton30419822017-05-12 18:01:32 +00001820 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001821
1822#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001823 /* OMPT implicit task begin */
1824 my_task_id = lw_taskteam.ompt_task_info.task_id;
1825 my_parallel_id = ompt_parallel_id;
1826 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1827 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1828 my_parallel_id, my_task_id);
1829 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001830#endif
1831
Jonathan Peyton30419822017-05-12 18:01:32 +00001832 /* OMPT state */
1833 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1834 } else {
1835 exit_runtime_p = &dummy;
1836 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001837#endif
1838
Jonathan Peyton30419822017-05-12 18:01:32 +00001839 {
1840 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1841 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1842 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001843#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001844 ,
1845 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001846#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001847 );
1848 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001849
1850#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001851 *exit_runtime_p = NULL;
1852 if (ompt_enabled) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001853#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001854 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001855
Jonathan Peyton30419822017-05-12 18:01:32 +00001856 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1857 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1858 my_parallel_id, my_task_id);
1859 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001860#endif
1861
Jonathan Peyton30419822017-05-12 18:01:32 +00001862 __ompt_lw_taskteam_unlink(master_th);
1863 // reset clear the task id only after unlinking the task
1864 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001865
Jonathan Peyton30419822017-05-12 18:01:32 +00001866 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1867 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1868 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context));
1869 }
1870 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1871 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001872#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001873#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001874 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001875#endif /* OMP_40_ENABLED */
1876 } else if (call_context == fork_context_gnu) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001877#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001878 ompt_lw_taskteam_t *lwt =
1879 (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t));
1880 __ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task,
1881 ompt_parallel_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001882
Jonathan Peyton30419822017-05-12 18:01:32 +00001883 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1884 lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
1885 __ompt_lw_taskteam_link(lwt, master_th);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001886#endif
1887
Jonathan Peyton30419822017-05-12 18:01:32 +00001888 // we were called from GNU native code
1889 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001890 return FALSE;
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001891 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001892 KMP_ASSERT2(call_context < fork_context_last,
1893 "__kmp_fork_call: unknown fork_context parameter");
1894 }
1895
1896 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
1897 KMP_MB();
1898 return FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001899 }
1900
Jim Cownie5e8470a2013-09-27 10:38:44 +00001901 // GEH: only modify the executing flag in the case when not serialized
1902 // serialized case is handled in kmpc_serialized_parallel
Jonathan Peyton30419822017-05-12 18:01:32 +00001903 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1904 "curtask=%p, curtask_max_aclevel=%d\n",
1905 parent_team->t.t_active_level, master_th,
1906 master_th->th.th_current_task,
1907 master_th->th.th_current_task->td_icvs.max_active_levels));
1908 // TODO: GEH - cannot do this assertion because root thread not set up as
1909 // executing
Jim Cownie5e8470a2013-09-27 10:38:44 +00001910 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
1911 master_th->th.th_current_task->td_flags.executing = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001912
1913#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001914 if (!master_th->th.th_teams_microtask || level > teams_level)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001915#endif /* OMP_40_ENABLED */
1916 {
Jonathan Peyton30419822017-05-12 18:01:32 +00001917 /* Increment our nested depth level */
1918 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001919 }
1920
Jim Cownie5e8470a2013-09-27 10:38:44 +00001921 // See if we need to make a copy of the ICVs.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001922 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
Jonathan Peyton30419822017-05-12 18:01:32 +00001923 if ((level + 1 < __kmp_nested_nth.used) &&
1924 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1925 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1926 } else {
1927 nthreads_icv = 0; // don't update
Jim Cownie5e8470a2013-09-27 10:38:44 +00001928 }
1929
1930#if OMP_40_ENABLED
Jim Cownie5e8470a2013-09-27 10:38:44 +00001931 // Figure out the proc_bind_policy for the new team.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001932 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
Jonathan Peyton30419822017-05-12 18:01:32 +00001933 kmp_proc_bind_t proc_bind_icv =
1934 proc_bind_default; // proc_bind_default means don't update
1935 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1936 proc_bind = proc_bind_false;
1937 } else {
1938 if (proc_bind == proc_bind_default) {
1939 // No proc_bind clause specified; use current proc-bind-var for this
1940 // parallel region
1941 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1942 }
1943 /* else: The proc_bind policy was specified explicitly on parallel clause.
1944 This overrides proc-bind-var for this parallel region, but does not
1945 change proc-bind-var. */
1946 // Figure the value of proc-bind-var for the child threads.
1947 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1948 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1949 master_th->th.th_current_task->td_icvs.proc_bind)) {
1950 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1951 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001952 }
1953
Jim Cownie5e8470a2013-09-27 10:38:44 +00001954 // Reset for next parallel region
Jim Cownie5e8470a2013-09-27 10:38:44 +00001955 master_th->th.th_set_proc_bind = proc_bind_default;
1956#endif /* OMP_40_ENABLED */
1957
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001958 if ((nthreads_icv > 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001959#if OMP_40_ENABLED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001960 || (proc_bind_icv != proc_bind_default)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001961#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00001962 ) {
1963 kmp_internal_control_t new_icvs;
1964 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
1965 new_icvs.next = NULL;
1966 if (nthreads_icv > 0) {
1967 new_icvs.nproc = nthreads_icv;
1968 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001969
1970#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001971 if (proc_bind_icv != proc_bind_default) {
1972 new_icvs.proc_bind = proc_bind_icv;
1973 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001974#endif /* OMP_40_ENABLED */
1975
Jonathan Peyton30419822017-05-12 18:01:32 +00001976 /* allocate a new parallel team */
1977 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
1978 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001979#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001980 ompt_parallel_id,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001981#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001982#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001983 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001984#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001985 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001986 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001987 /* allocate a new parallel team */
1988 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
1989 team = __kmp_allocate_team(root, nthreads, nthreads,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001990#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001991 ompt_parallel_id,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001992#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001993#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001994 proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001996 &master_th->th.th_current_task->td_icvs,
1997 argc USE_NESTED_HOT_ARG(master_th));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001998 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001999 KF_TRACE(
2000 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002001
2002 /* setup the new team */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002003 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2004 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2005 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2006 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2007 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002008#if OMPT_SUPPORT
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002009 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002010#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002011 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
2012// TODO: parent_team->t.t_level == INT_MAX ???
Jim Cownie5e8470a2013-09-27 10:38:44 +00002013#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002014 if (!master_th->th.th_teams_microtask || level > teams_level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002016 int new_level = parent_team->t.t_level + 1;
2017 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2018 new_level = parent_team->t.t_active_level + 1;
2019 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002020#if OMP_40_ENABLED
2021 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002022 // AC: Do not increase parallel level at start of the teams construct
2023 int new_level = parent_team->t.t_level;
2024 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2025 new_level = parent_team->t.t_active_level;
2026 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002027 }
2028#endif /* OMP_40_ENABLED */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002029 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
Jonathan Peyton30419822017-05-12 18:01:32 +00002030 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
2031 team->t.t_sched.chunk != new_sched.chunk)
2032 team->t.t_sched =
2033 new_sched; // set master's schedule as new run-time schedule
Jim Cownie5e8470a2013-09-27 10:38:44 +00002034
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002035#if OMP_40_ENABLED
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002036 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
Jonathan Peyton45ca5da2015-10-19 19:33:38 +00002037#endif
2038
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002039 // Update the floating point rounding in the team if required.
2040 propagateFPControl(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002041
Jonathan Peyton30419822017-05-12 18:01:32 +00002042 if (__kmp_tasking_mode != tskm_immediate_exec) {
2043 // Set master's task team to team's task team. Unless this is hot team, it
2044 // should be NULL.
Jonathan Peyton30419822017-05-12 18:01:32 +00002045 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2046 parent_team->t.t_task_team[master_th->th.th_task_state]);
Jonathan Peyton30419822017-05-12 18:01:32 +00002047 KA_TRACE(20, ("__kmp_fork_call: Master T#%d pushing task_team %p / team "
2048 "%p, new task_team %p / team %p\n",
2049 __kmp_gtid_from_thread(master_th),
2050 master_th->th.th_task_team, parent_team,
2051 team->t.t_task_team[master_th->th.th_task_state], team));
Jonathan Peytond3f2b942016-02-09 22:32:41 +00002052
Jonathan Peyton30419822017-05-12 18:01:32 +00002053 if (active_level || master_th->th.th_task_team) {
2054 // Take a memo of master's task_state
2055 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2056 if (master_th->th.th_task_state_top >=
2057 master_th->th.th_task_state_stack_sz) { // increase size
2058 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2059 kmp_uint8 *old_stack, *new_stack;
2060 kmp_uint32 i;
2061 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2062 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2063 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2064 }
2065 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2066 ++i) { // zero-init rest of stack
2067 new_stack[i] = 0;
2068 }
2069 old_stack = master_th->th.th_task_state_memo_stack;
2070 master_th->th.th_task_state_memo_stack = new_stack;
2071 master_th->th.th_task_state_stack_sz = new_size;
2072 __kmp_free(old_stack);
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002073 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002074 // Store master's task_state on stack
2075 master_th->th
2076 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2077 master_th->th.th_task_state;
2078 master_th->th.th_task_state_top++;
2079#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton642688b2017-06-01 16:46:36 +00002080 if (team == master_th->th.th_hot_teams[active_level].hot_team) {
2081 // Restore master's nested state if nested hot team
Jonathan Peyton30419822017-05-12 18:01:32 +00002082 master_th->th.th_task_state =
2083 master_th->th
2084 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2085 } else {
2086#endif
2087 master_th->th.th_task_state = 0;
2088#if KMP_NESTED_HOT_TEAMS
2089 }
2090#endif
2091 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002092#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002093 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2094 (team == root->r.r_hot_team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002095#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002096 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002097
Jonathan Peyton30419822017-05-12 18:01:32 +00002098 KA_TRACE(
2099 20,
2100 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2101 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2102 team->t.t_nproc));
2103 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2104 (team->t.t_master_tid == 0 &&
2105 (team->t.t_parent == root->r.r_root_team ||
2106 team->t.t_parent->t.t_serialized)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002107 KMP_MB();
2108
2109 /* now, setup the arguments */
Jonathan Peyton30419822017-05-12 18:01:32 +00002110 argv = (void **)team->t.t_argv;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002111#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002112 if (ap) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002113#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002114 for (i = argc - 1; i >= 0; --i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002115// TODO: revert workaround for Intel(R) 64 tracker #96
Andrey Churbanovcbda8682015-01-13 14:43:35 +00002116#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
Jonathan Peyton30419822017-05-12 18:01:32 +00002117 void *new_argv = va_arg(*ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002118#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002119 void *new_argv = va_arg(ap, void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002120#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002121 KMP_CHECK_UPDATE(*argv, new_argv);
2122 argv++;
2123 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002124#if OMP_40_ENABLED
2125 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002126 for (i = 0; i < argc; ++i) {
2127 // Get args from parent team for teams construct
2128 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2129 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002130 }
2131#endif /* OMP_40_ENABLED */
2132
2133 /* now actually fork the threads */
Jonathan Peytonb044e4f2016-05-23 18:01:19 +00002134 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002135 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
Jonathan Peyton30419822017-05-12 18:01:32 +00002136 root->r.r_active = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002137
Jonathan Peyton30419822017-05-12 18:01:32 +00002138 __kmp_fork_team_threads(root, team, master_th, gtid);
2139 __kmp_setup_icv_copy(team, nthreads,
2140 &master_th->th.th_current_task->td_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002141
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002142#if OMPT_SUPPORT
2143 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2144#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002145
Jonathan Peyton30419822017-05-12 18:01:32 +00002146 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002147
Jim Cownie5e8470a2013-09-27 10:38:44 +00002148#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002149 if (team->t.t_active_level == 1 // only report frames at level 1
2150#if OMP_40_ENABLED
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002151 && !master_th->th.th_teams_microtask // not in teams construct
Jonathan Peyton30419822017-05-12 18:01:32 +00002152#endif /* OMP_40_ENABLED */
2153 ) {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002154#if USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00002155 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2156 (__kmp_forkjoin_frames_mode == 3 ||
2157 __kmp_forkjoin_frames_mode == 1)) {
2158 kmp_uint64 tmp_time = 0;
2159 if (__itt_get_timestamp_ptr)
2160 tmp_time = __itt_get_timestamp();
2161 // Internal fork - report frame begin
2162 master_th->th.th_frame_time = tmp_time;
2163 if (__kmp_forkjoin_frames_mode == 3)
2164 team->t.t_region_time = tmp_time;
Jonathan Peyton642688b2017-06-01 16:46:36 +00002165 } else
2166// only one notification scheme (either "submit" or "forking/joined", not both)
Andrey Churbanov51aecb82015-05-06 19:22:36 +00002167#endif /* USE_ITT_NOTIFY */
Jonathan Peyton30419822017-05-12 18:01:32 +00002168 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2169 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2170 // Mark start of "parallel" region for VTune.
2171 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2172 }
Andrey Churbanovf6451d92015-01-16 15:58:03 +00002173 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002174#endif /* USE_ITT_BUILD */
2175
2176 /* now go on and do the work */
Jonathan Peyton30419822017-05-12 18:01:32 +00002177 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002178 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00002179 KF_TRACE(10,
2180 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2181 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002182
2183#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00002184 if (__itt_stack_caller_create_ptr) {
2185 team->t.t_stack_id =
2186 __kmp_itt_stack_caller_create(); // create new stack stitching id
2187 // before entering fork barrier
Jim Cownie5e8470a2013-09-27 10:38:44 +00002188 }
2189#endif /* USE_ITT_BUILD */
2190
2191#if OMP_40_ENABLED
Jonathan Peyton642688b2017-06-01 16:46:36 +00002192 // AC: skip __kmp_internal_fork at teams construct, let only master
2193 // threads execute
2194 if (ap)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002195#endif /* OMP_40_ENABLED */
2196 {
Jonathan Peyton30419822017-05-12 18:01:32 +00002197 __kmp_internal_fork(loc, gtid, team);
2198 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "
2199 "master_th=%p, gtid=%d\n",
2200 root, team, master_th, gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002201 }
2202
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002203 if (call_context == fork_context_gnu) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002204 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
2205 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002206 }
2207
2208 /* Invoke microtask for MASTER thread */
Jonathan Peyton30419822017-05-12 18:01:32 +00002209 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2210 team->t.t_id, team->t.t_pkfn));
2211 } // END of timer KMP_fork_call block
Jim Cownie5e8470a2013-09-27 10:38:44 +00002212
Jonathan Peyton30419822017-05-12 18:01:32 +00002213 {
2214 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2215 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
2216 if (!team->t.t_invoke(gtid)) {
2217 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
Jim Cownie5e8470a2013-09-27 10:38:44 +00002218 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002219 }
2220 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2221 team->t.t_id, team->t.t_pkfn));
2222 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00002223
Jonathan Peyton30419822017-05-12 18:01:32 +00002224 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002225
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002226#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002227 if (ompt_enabled) {
2228 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2229 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002230#endif
2231
Jonathan Peyton30419822017-05-12 18:01:32 +00002232 return TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002233}
2234
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002235#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002236static inline void __kmp_join_restore_state(kmp_info_t *thread,
2237 kmp_team_t *team) {
2238 // restore state outside the region
2239 thread->th.ompt_thread_info.state =
2240 ((team->t.t_serialized) ? ompt_state_work_serial
2241 : ompt_state_work_parallel);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002242}
2243
Jonathan Peyton30419822017-05-12 18:01:32 +00002244static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team,
2245 ompt_parallel_id_t parallel_id,
2246 fork_context_e fork_context) {
2247 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2248 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2249 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
2250 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
2251 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002252
Jonathan Peyton30419822017-05-12 18:01:32 +00002253 task_info->frame.reenter_runtime_frame = NULL;
2254 __kmp_join_restore_state(thread, team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002255}
2256#endif
2257
Jonathan Peyton30419822017-05-12 18:01:32 +00002258void __kmp_join_call(ident_t *loc, int gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002259#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002260 ,
2261 enum fork_context_e fork_context
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00002262#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002263#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002264 ,
2265 int exit_teams
Jim Cownie5e8470a2013-09-27 10:38:44 +00002266#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002267 ) {
2268 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2269 kmp_team_t *team;
2270 kmp_team_t *parent_team;
2271 kmp_info_t *master_th;
2272 kmp_root_t *root;
2273 int master_active;
2274 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002275
Jonathan Peyton30419822017-05-12 18:01:32 +00002276 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002277
Jonathan Peyton30419822017-05-12 18:01:32 +00002278 /* setup current data */
2279 master_th = __kmp_threads[gtid];
2280 root = master_th->th.th_root;
2281 team = master_th->th.th_team;
2282 parent_team = team->t.t_parent;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002283
Jonathan Peyton30419822017-05-12 18:01:32 +00002284 master_th->th.th_ident = loc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002285
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002286#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00002287 if (ompt_enabled) {
2288 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2289 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002290#endif
2291
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002292#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00002293 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2294 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2295 "th_task_team = %p\n",
2296 __kmp_gtid_from_thread(master_th), team,
2297 team->t.t_task_team[master_th->th.th_task_state],
2298 master_th->th.th_task_team));
2299 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2300 team->t.t_task_team[master_th->th.th_task_state]);
2301 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002302#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002303
Jonathan Peyton30419822017-05-12 18:01:32 +00002304 if (team->t.t_serialized) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002305#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002306 if (master_th->th.th_teams_microtask) {
2307 // We are in teams construct
2308 int level = team->t.t_level;
2309 int tlevel = master_th->th.th_teams_level;
2310 if (level == tlevel) {
2311 // AC: we haven't incremented it earlier at start of teams construct,
2312 // so do it here - at the end of teams construct
2313 team->t.t_level++;
2314 } else if (level == tlevel + 1) {
2315 // AC: we are exiting parallel inside teams, need to increment
2316 // serialization in order to restore it in the next call to
2317 // __kmpc_end_serialized_parallel
2318 team->t.t_serialized++;
2319 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00002320 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00002321#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00002322 __kmpc_end_serialized_parallel(loc, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002323
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002324#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00002325 if (ompt_enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002326 __kmp_join_restore_state(master_th, parent_team);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00002327 }
2328#endif
2329
Jonathan Peyton30419822017-05-12 18:01:32 +00002330 return;
2331 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002332
Jonathan Peyton30419822017-05-12 18:01:32 +00002333 master_active = team->t.t_master_active;
2334
2335#if OMP_40_ENABLED
2336 if (!exit_teams)
2337#endif /* OMP_40_ENABLED */
2338 {
2339 // AC: No barrier for internal teams at exit from teams construct.
2340 // But there is barrier for external team (league).
2341 __kmp_internal_join(loc, gtid, team);
2342 }
2343#if OMP_40_ENABLED
2344 else {
2345 master_th->th.th_task_state =
2346 0; // AC: no tasking in teams (out of any parallel)
2347 }
2348#endif /* OMP_40_ENABLED */
2349
2350 KMP_MB();
2351
2352#if OMPT_SUPPORT
2353 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2354#endif
2355
2356#if USE_ITT_BUILD
2357 if (__itt_stack_caller_create_ptr) {
2358 __kmp_itt_stack_caller_destroy(
2359 (__itt_caller)team->t
2360 .t_stack_id); // destroy the stack stitching id after join barrier
2361 }
2362
2363 // Mark end of "parallel" region for VTune.
2364 if (team->t.t_active_level == 1
2365#if OMP_40_ENABLED
2366 && !master_th->th.th_teams_microtask /* not in teams construct */
2367#endif /* OMP_40_ENABLED */
2368 ) {
2369 master_th->th.th_ident = loc;
2370 // only one notification scheme (either "submit" or "forking/joined", not
2371 // both)
2372 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2373 __kmp_forkjoin_frames_mode == 3)
2374 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2375 master_th->th.th_frame_time, 0, loc,
2376 master_th->th.th_team_nproc, 1);
2377 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2378 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2379 __kmp_itt_region_joined(gtid);
2380 } // active_level == 1
2381#endif /* USE_ITT_BUILD */
2382
2383#if OMP_40_ENABLED
2384 if (master_th->th.th_teams_microtask && !exit_teams &&
2385 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2386 team->t.t_level == master_th->th.th_teams_level + 1) {
2387 // AC: We need to leave the team structure intact at the end of parallel
2388 // inside the teams construct, so that at the next parallel same (hot) team
2389 // works, only adjust nesting levels
2390
2391 /* Decrement our nested depth level */
2392 team->t.t_level--;
2393 team->t.t_active_level--;
2394 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2395
2396 /* Restore number of threads in the team if needed */
2397 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2398 int old_num = master_th->th.th_team_nproc;
2399 int new_num = master_th->th.th_teams_size.nth;
2400 kmp_info_t **other_threads = team->t.t_threads;
2401 team->t.t_nproc = new_num;
2402 for (i = 0; i < old_num; ++i) {
2403 other_threads[i]->th.th_team_nproc = new_num;
2404 }
2405 // Adjust states of non-used threads of the team
2406 for (i = old_num; i < new_num; ++i) {
2407 // Re-initialize thread's barrier data.
2408 int b;
2409 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2410 for (b = 0; b < bs_last_barrier; ++b) {
2411 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2412 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2413#if USE_DEBUGGER
2414 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2415#endif
2416 }
2417 if (__kmp_tasking_mode != tskm_immediate_exec) {
2418 // Synchronize thread's task state
2419 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2420 }
2421 }
2422 }
2423
2424#if OMPT_SUPPORT
2425 if (ompt_enabled) {
2426 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2427 }
2428#endif
2429
2430 return;
2431 }
2432#endif /* OMP_40_ENABLED */
2433
2434 /* do cleanup and restore the parent team */
2435 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2436 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2437
2438 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2439
2440 /* jc: The following lock has instructions with REL and ACQ semantics,
2441 separating the parallel user code called in this parallel region
2442 from the serial user code called after this function returns. */
2443 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2444
2445#if OMP_40_ENABLED
2446 if (!master_th->th.th_teams_microtask ||
2447 team->t.t_level > master_th->th.th_teams_level)
2448#endif /* OMP_40_ENABLED */
2449 {
2450 /* Decrement our nested depth level */
2451 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2452 }
2453 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2454
2455#if OMPT_SUPPORT && OMPT_TRACE
2456 if (ompt_enabled) {
2457 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2458 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2459 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2460 parallel_id, task_info->task_id);
2461 }
2462 task_info->frame.exit_runtime_frame = NULL;
2463 task_info->task_id = 0;
2464 }
2465#endif
2466
2467 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2468 master_th, team));
2469 __kmp_pop_current_task_from_thread(master_th);
2470
2471#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2472 // Restore master thread's partition.
2473 master_th->th.th_first_place = team->t.t_first_place;
2474 master_th->th.th_last_place = team->t.t_last_place;
2475#endif /* OMP_40_ENABLED */
2476
2477 updateHWFPControl(team);
2478
2479 if (root->r.r_active != master_active)
2480 root->r.r_active = master_active;
2481
2482 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2483 master_th)); // this will free worker threads
2484
2485 /* this race was fun to find. make sure the following is in the critical
2486 region otherwise assertions may fail occasionally since the old team may be
2487 reallocated and the hierarchy appears inconsistent. it is actually safe to
2488 run and won't cause any bugs, but will cause those assertion failures. it's
2489 only one deref&assign so might as well put this in the critical region */
2490 master_th->th.th_team = parent_team;
2491 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2492 master_th->th.th_team_master = parent_team->t.t_threads[0];
2493 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2494
2495 /* restore serialized team, if need be */
2496 if (parent_team->t.t_serialized &&
2497 parent_team != master_th->th.th_serial_team &&
2498 parent_team != root->r.r_root_team) {
2499 __kmp_free_team(root,
2500 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2501 master_th->th.th_serial_team = parent_team;
2502 }
2503
2504 if (__kmp_tasking_mode != tskm_immediate_exec) {
2505 if (master_th->th.th_task_state_top >
2506 0) { // Restore task state from memo stack
2507 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2508 // Remember master's state if we re-use this nested hot team
2509 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2510 master_th->th.th_task_state;
2511 --master_th->th.th_task_state_top; // pop
2512 // Now restore state at this level
2513 master_th->th.th_task_state =
2514 master_th->th
2515 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2516 }
2517 // Copy the task team from the parent team to the master thread
2518 master_th->th.th_task_team =
2519 parent_team->t.t_task_team[master_th->th.th_task_state];
2520 KA_TRACE(20,
2521 ("__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2522 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2523 parent_team));
2524 }
2525
2526 // TODO: GEH - cannot do this assertion because root thread not set up as
2527 // executing
2528 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
2529 master_th->th.th_current_task->td_flags.executing = 1;
2530
2531 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2532
2533#if OMPT_SUPPORT
2534 if (ompt_enabled) {
2535 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2536 }
2537#endif
2538
2539 KMP_MB();
2540 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid));
2541}
Jim Cownie5e8470a2013-09-27 10:38:44 +00002542
2543/* Check whether we should push an internal control record onto the
2544 serial team stack. If so, do it. */
Jonathan Peyton30419822017-05-12 18:01:32 +00002545void __kmp_save_internal_controls(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002546
Jonathan Peyton30419822017-05-12 18:01:32 +00002547 if (thread->th.th_team != thread->th.th_serial_team) {
2548 return;
2549 }
2550 if (thread->th.th_team->t.t_serialized > 1) {
2551 int push = 0;
2552
2553 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2554 push = 1;
2555 } else {
2556 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2557 thread->th.th_team->t.t_serialized) {
2558 push = 1;
2559 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002560 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002561 if (push) { /* push a record on the serial team's stack */
2562 kmp_internal_control_t *control =
2563 (kmp_internal_control_t *)__kmp_allocate(
2564 sizeof(kmp_internal_control_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002565
Jonathan Peyton30419822017-05-12 18:01:32 +00002566 copy_icvs(control, &thread->th.th_current_task->td_icvs);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002567
Jonathan Peyton30419822017-05-12 18:01:32 +00002568 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002569
Jonathan Peyton30419822017-05-12 18:01:32 +00002570 control->next = thread->th.th_team->t.t_control_stack_top;
2571 thread->th.th_team->t.t_control_stack_top = control;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002572 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002573 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002574}
2575
2576/* Changes set_nproc */
Jonathan Peyton30419822017-05-12 18:01:32 +00002577void __kmp_set_num_threads(int new_nth, int gtid) {
2578 kmp_info_t *thread;
2579 kmp_root_t *root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002580
Jonathan Peyton30419822017-05-12 18:01:32 +00002581 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2582 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002583
Jonathan Peyton30419822017-05-12 18:01:32 +00002584 if (new_nth < 1)
2585 new_nth = 1;
2586 else if (new_nth > __kmp_max_nth)
2587 new_nth = __kmp_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002588
Jonathan Peyton30419822017-05-12 18:01:32 +00002589 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
2590 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002591
Jonathan Peyton30419822017-05-12 18:01:32 +00002592 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002593
Jonathan Peyton30419822017-05-12 18:01:32 +00002594 set__nproc(thread, new_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002595
Jonathan Peyton30419822017-05-12 18:01:32 +00002596 // If this omp_set_num_threads() call will cause the hot team size to be
2597 // reduced (in the absence of a num_threads clause), then reduce it now,
2598 // rather than waiting for the next parallel region.
2599 root = thread->th.th_root;
2600 if (__kmp_init_parallel && (!root->r.r_active) &&
2601 (root->r.r_hot_team->t.t_nproc > new_nth)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002602#if KMP_NESTED_HOT_TEAMS
2603 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2604#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002605 ) {
2606 kmp_team_t *hot_team = root->r.r_hot_team;
2607 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002608
Jonathan Peyton30419822017-05-12 18:01:32 +00002609 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002610
Jonathan Peyton30419822017-05-12 18:01:32 +00002611 // Release the extra threads we don't need any more.
2612 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2613 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2614 if (__kmp_tasking_mode != tskm_immediate_exec) {
2615 // When decreasing team size, threads no longer in the team should unref
2616 // task team.
2617 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2618 }
2619 __kmp_free_thread(hot_team->t.t_threads[f]);
2620 hot_team->t.t_threads[f] = NULL;
2621 }
2622 hot_team->t.t_nproc = new_nth;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002623#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00002624 if (thread->th.th_hot_teams) {
2625 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2626 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2627 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002628#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002629
Jonathan Peyton30419822017-05-12 18:01:32 +00002630 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002631
Jonathan Peyton30419822017-05-12 18:01:32 +00002632 // Update the t_nproc field in the threads that are still active.
2633 for (f = 0; f < new_nth; f++) {
2634 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2635 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002636 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002637 // Special flag in case omp_set_num_threads() call
2638 hot_team->t.t_size_changed = -1;
2639 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002640}
2641
Jim Cownie5e8470a2013-09-27 10:38:44 +00002642/* Changes max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002643void __kmp_set_max_active_levels(int gtid, int max_active_levels) {
2644 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002645
Jonathan Peyton30419822017-05-12 18:01:32 +00002646 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "
2647 "%d = (%d)\n",
2648 gtid, max_active_levels));
2649 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002650
Jonathan Peyton30419822017-05-12 18:01:32 +00002651 // validate max_active_levels
2652 if (max_active_levels < 0) {
2653 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2654 // We ignore this call if the user has specified a negative value.
2655 // The current setting won't be changed. The last valid setting will be
2656 // used. A warning will be issued (if warnings are allowed as controlled by
2657 // the KMP_WARNINGS env var).
2658 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "
2659 "max_active_levels for thread %d = (%d)\n",
2660 gtid, max_active_levels));
2661 return;
2662 }
2663 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2664 // it's OK, the max_active_levels is within the valid range: [ 0;
2665 // KMP_MAX_ACTIVE_LEVELS_LIMIT ]
2666 // We allow a zero value. (implementation defined behavior)
2667 } else {
2668 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2669 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2670 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2671 // Current upper limit is MAX_INT. (implementation defined behavior)
2672 // If the input exceeds the upper limit, we correct the input to be the
2673 // upper limit. (implementation defined behavior)
2674 // Actually, the flow should never get here until we use MAX_INT limit.
2675 }
2676 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "
2677 "max_active_levels for thread %d = (%d)\n",
2678 gtid, max_active_levels));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002679
Jonathan Peyton30419822017-05-12 18:01:32 +00002680 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002681
Jonathan Peyton30419822017-05-12 18:01:32 +00002682 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002683
Jonathan Peyton30419822017-05-12 18:01:32 +00002684 set__max_active_levels(thread, max_active_levels);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002685}
2686
2687/* Gets max_active_levels */
Jonathan Peyton30419822017-05-12 18:01:32 +00002688int __kmp_get_max_active_levels(int gtid) {
2689 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002690
Jonathan Peyton30419822017-05-12 18:01:32 +00002691 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid));
2692 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002693
Jonathan Peyton30419822017-05-12 18:01:32 +00002694 thread = __kmp_threads[gtid];
2695 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2696 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "
2697 "curtask_maxaclevel=%d\n",
2698 gtid, thread->th.th_current_task,
2699 thread->th.th_current_task->td_icvs.max_active_levels));
2700 return thread->th.th_current_task->td_icvs.max_active_levels;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002701}
2702
2703/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
Jonathan Peyton30419822017-05-12 18:01:32 +00002704void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
2705 kmp_info_t *thread;
2706 // kmp_team_t *team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002707
Jonathan Peyton30419822017-05-12 18:01:32 +00002708 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2709 gtid, (int)kind, chunk));
2710 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002711
Jonathan Peyton30419822017-05-12 18:01:32 +00002712 // Check if the kind parameter is valid, correct if needed.
2713 // Valid parameters should fit in one of two intervals - standard or extended:
2714 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
2715 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
2716 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2717 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2718 // TODO: Hint needs attention in case we change the default schedule.
2719 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2720 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"),
2721 __kmp_msg_null);
2722 kind = kmp_sched_default;
2723 chunk = 0; // ignore chunk value in case of bad kind
2724 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002725
Jonathan Peyton30419822017-05-12 18:01:32 +00002726 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002727
Jonathan Peyton30419822017-05-12 18:01:32 +00002728 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002729
Jonathan Peyton30419822017-05-12 18:01:32 +00002730 if (kind < kmp_sched_upper_std) {
2731 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2732 // differ static chunked vs. unchunked: chunk should be invalid to
2733 // indicate unchunked schedule (which is the default)
2734 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002735 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002736 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2737 __kmp_sch_map[kind - kmp_sched_lower - 1];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002738 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002739 } else {
2740 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2741 // kmp_sched_lower - 2 ];
2742 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2743 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2744 kmp_sched_lower - 2];
2745 }
Andrey Churbanovd454c732017-06-05 17:17:33 +00002746 if (kind == kmp_sched_auto || chunk < 1) {
Jonathan Peyton30419822017-05-12 18:01:32 +00002747 // ignore parameter chunk for schedule auto
2748 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2749 } else {
2750 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2751 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002752}
2753
2754/* Gets def_sched_var ICV values */
Jonathan Peyton30419822017-05-12 18:01:32 +00002755void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
2756 kmp_info_t *thread;
2757 enum sched_type th_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002758
Jonathan Peyton30419822017-05-12 18:01:32 +00002759 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid));
2760 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002761
Jonathan Peyton30419822017-05-12 18:01:32 +00002762 thread = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00002763
Jonathan Peyton30419822017-05-12 18:01:32 +00002764 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002765
Jonathan Peyton30419822017-05-12 18:01:32 +00002766 switch (th_type) {
2767 case kmp_sch_static:
2768 case kmp_sch_static_greedy:
2769 case kmp_sch_static_balanced:
2770 *kind = kmp_sched_static;
2771 *chunk = 0; // chunk was not set, try to show this fact via zero value
2772 return;
2773 case kmp_sch_static_chunked:
2774 *kind = kmp_sched_static;
2775 break;
2776 case kmp_sch_dynamic_chunked:
2777 *kind = kmp_sched_dynamic;
2778 break;
2779 case kmp_sch_guided_chunked:
2780 case kmp_sch_guided_iterative_chunked:
2781 case kmp_sch_guided_analytical_chunked:
2782 *kind = kmp_sched_guided;
2783 break;
2784 case kmp_sch_auto:
2785 *kind = kmp_sched_auto;
2786 break;
2787 case kmp_sch_trapezoidal:
2788 *kind = kmp_sched_trapezoidal;
2789 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002790#if KMP_STATIC_STEAL_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002791 case kmp_sch_static_steal:
2792 *kind = kmp_sched_static_steal;
2793 break;
Jonathan Peytona1234cf2016-10-07 18:01:35 +00002794#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00002795 default:
2796 KMP_FATAL(UnknownSchedulingType, th_type);
2797 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002798
Jonathan Peyton30419822017-05-12 18:01:32 +00002799 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002800}
2801
Jonathan Peyton30419822017-05-12 18:01:32 +00002802int __kmp_get_ancestor_thread_num(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002803
Jonathan Peyton30419822017-05-12 18:01:32 +00002804 int ii, dd;
2805 kmp_team_t *team;
2806 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002807
Jonathan Peyton30419822017-05-12 18:01:32 +00002808 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2809 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002810
Jonathan Peyton30419822017-05-12 18:01:32 +00002811 // validate level
2812 if (level == 0)
2813 return 0;
2814 if (level < 0)
2815 return -1;
2816 thr = __kmp_threads[gtid];
2817 team = thr->th.th_team;
2818 ii = team->t.t_level;
2819 if (level > ii)
2820 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002821
2822#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002823 if (thr->th.th_teams_microtask) {
2824 // AC: we are in teams region where multiple nested teams have same level
2825 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2826 if (level <=
2827 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2828 KMP_DEBUG_ASSERT(ii >= tlevel);
2829 // AC: As we need to pass by the teams league, we need to artificially
2830 // increase ii
2831 if (ii == tlevel) {
2832 ii += 2; // three teams have same level
2833 } else {
2834 ii++; // two teams have same level
2835 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002836 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002837 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002838#endif
2839
Jonathan Peyton30419822017-05-12 18:01:32 +00002840 if (ii == level)
2841 return __kmp_tid_from_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002842
Jonathan Peyton30419822017-05-12 18:01:32 +00002843 dd = team->t.t_serialized;
2844 level++;
2845 while (ii > level) {
2846 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002847 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002848 if ((team->t.t_serialized) && (!dd)) {
2849 team = team->t.t_parent;
2850 continue;
2851 }
2852 if (ii > level) {
2853 team = team->t.t_parent;
2854 dd = team->t.t_serialized;
2855 ii--;
2856 }
2857 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002858
Jonathan Peyton30419822017-05-12 18:01:32 +00002859 return (dd > 1) ? (0) : (team->t.t_master_tid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002860}
2861
Jonathan Peyton30419822017-05-12 18:01:32 +00002862int __kmp_get_team_size(int gtid, int level) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002863
Jonathan Peyton30419822017-05-12 18:01:32 +00002864 int ii, dd;
2865 kmp_team_t *team;
2866 kmp_info_t *thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002867
Jonathan Peyton30419822017-05-12 18:01:32 +00002868 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level));
2869 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002870
Jonathan Peyton30419822017-05-12 18:01:32 +00002871 // validate level
2872 if (level == 0)
2873 return 1;
2874 if (level < 0)
2875 return -1;
2876 thr = __kmp_threads[gtid];
2877 team = thr->th.th_team;
2878 ii = team->t.t_level;
2879 if (level > ii)
2880 return -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002881
2882#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00002883 if (thr->th.th_teams_microtask) {
2884 // AC: we are in teams region where multiple nested teams have same level
2885 int tlevel = thr->th.th_teams_level; // the level of the teams construct
2886 if (level <=
2887 tlevel) { // otherwise usual algorithm works (will not touch the teams)
2888 KMP_DEBUG_ASSERT(ii >= tlevel);
2889 // AC: As we need to pass by the teams league, we need to artificially
2890 // increase ii
2891 if (ii == tlevel) {
2892 ii += 2; // three teams have same level
2893 } else {
2894 ii++; // two teams have same level
2895 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002896 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002897 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002898#endif
2899
Jonathan Peyton30419822017-05-12 18:01:32 +00002900 while (ii > level) {
2901 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002902 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002903 if (team->t.t_serialized && (!dd)) {
2904 team = team->t.t_parent;
2905 continue;
2906 }
2907 if (ii > level) {
2908 team = team->t.t_parent;
2909 ii--;
2910 }
2911 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002912
Jonathan Peyton30419822017-05-12 18:01:32 +00002913 return team->t.t_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002914}
2915
Jonathan Peyton30419822017-05-12 18:01:32 +00002916kmp_r_sched_t __kmp_get_schedule_global() {
2917 // This routine created because pairs (__kmp_sched, __kmp_chunk) and
2918 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults
2919 // independently. So one can get the updated schedule here.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002920
Jonathan Peyton30419822017-05-12 18:01:32 +00002921 kmp_r_sched_t r_sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002922
Jonathan Peyton30419822017-05-12 18:01:32 +00002923 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,
2924 // __kmp_guided. __kmp_sched should keep original value, so that user can set
2925 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in
2926 // different roots (even in OMP 2.5)
2927 if (__kmp_sched == kmp_sch_static) {
2928 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed
2929 // schedule (balanced or greedy)
2930 } else if (__kmp_sched == kmp_sch_guided_chunked) {
2931 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed
2932 // schedule (iterative or analytical)
2933 } else {
2934 r_sched.r_sched_type =
2935 __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
2936 }
2937
2938 if (__kmp_chunk < KMP_DEFAULT_CHUNK) { // __kmp_chunk may be wrong here (if it
2939 // was not ever set)
2940 r_sched.chunk = KMP_DEFAULT_CHUNK;
2941 } else {
2942 r_sched.chunk = __kmp_chunk;
2943 }
2944
2945 return r_sched;
2946}
2947
2948/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
2949 at least argc number of *t_argv entries for the requested team. */
2950static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {
2951
2952 KMP_DEBUG_ASSERT(team);
2953 if (!realloc || argc > team->t.t_max_argc) {
2954
2955 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "
2956 "current entries=%d\n",
2957 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
2958 /* if previously allocated heap space for args, free them */
2959 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
2960 __kmp_free((void *)team->t.t_argv);
2961
2962 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
2963 /* use unused space in the cache line for arguments */
2964 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
2965 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "
2966 "argv entries\n",
2967 team->t.t_id, team->t.t_max_argc));
2968 team->t.t_argv = &team->t.t_inline_argv[0];
2969 if (__kmp_storage_map) {
2970 __kmp_print_storage_map_gtid(
2971 -1, &team->t.t_inline_argv[0],
2972 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2973 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv",
2974 team->t.t_id);
2975 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002976 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00002977 /* allocate space for arguments in the heap */
2978 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
2979 ? KMP_MIN_MALLOC_ARGV_ENTRIES
2980 : 2 * argc;
2981 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
2982 "argv entries\n",
2983 team->t.t_id, team->t.t_max_argc));
2984 team->t.t_argv =
2985 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc);
2986 if (__kmp_storage_map) {
2987 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
2988 &team->t.t_argv[team->t.t_max_argc],
2989 sizeof(void *) * team->t.t_max_argc,
2990 "team_%d.t_argv", team->t.t_id);
2991 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002992 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002993 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002994}
2995
Jonathan Peyton30419822017-05-12 18:01:32 +00002996static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
2997 int i;
2998 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
2999 team->t.t_threads =
3000 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth);
3001 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3002 sizeof(dispatch_shared_info_t) * num_disp_buff);
3003 team->t.t_dispatch =
3004 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth);
3005 team->t.t_implicit_task_taskdata =
3006 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth);
3007 team->t.t_max_nproc = max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003008
Jonathan Peyton30419822017-05-12 18:01:32 +00003009 /* setup dispatch buffers */
3010 for (i = 0; i < num_disp_buff; ++i) {
3011 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003012#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003013 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00003014#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003015 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003016}
3017
Jonathan Peyton30419822017-05-12 18:01:32 +00003018static void __kmp_free_team_arrays(kmp_team_t *team) {
3019 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3020 int i;
3021 for (i = 0; i < team->t.t_max_nproc; ++i) {
3022 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3023 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3024 team->t.t_dispatch[i].th_disp_buffer = NULL;
3025 }; // if
3026 }; // for
3027 __kmp_free(team->t.t_threads);
3028 __kmp_free(team->t.t_disp_buffer);
3029 __kmp_free(team->t.t_dispatch);
3030 __kmp_free(team->t.t_implicit_task_taskdata);
3031 team->t.t_threads = NULL;
3032 team->t.t_disp_buffer = NULL;
3033 team->t.t_dispatch = NULL;
3034 team->t.t_implicit_task_taskdata = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003035}
3036
Jonathan Peyton30419822017-05-12 18:01:32 +00003037static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3038 kmp_info_t **oldThreads = team->t.t_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003039
Jonathan Peyton30419822017-05-12 18:01:32 +00003040 __kmp_free(team->t.t_disp_buffer);
3041 __kmp_free(team->t.t_dispatch);
3042 __kmp_free(team->t.t_implicit_task_taskdata);
3043 __kmp_allocate_team_arrays(team, max_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003044
Jonathan Peyton30419822017-05-12 18:01:32 +00003045 KMP_MEMCPY(team->t.t_threads, oldThreads,
3046 team->t.t_nproc * sizeof(kmp_info_t *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003047
Jonathan Peyton30419822017-05-12 18:01:32 +00003048 __kmp_free(oldThreads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003049}
3050
Jonathan Peyton30419822017-05-12 18:01:32 +00003051static kmp_internal_control_t __kmp_get_global_icvs(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003052
Jonathan Peyton30419822017-05-12 18:01:32 +00003053 kmp_r_sched_t r_sched =
3054 __kmp_get_schedule_global(); // get current state of scheduling globals
Jim Cownie5e8470a2013-09-27 10:38:44 +00003055
3056#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003057 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003058#endif /* OMP_40_ENABLED */
3059
Jonathan Peyton30419822017-05-12 18:01:32 +00003060 kmp_internal_control_t g_icvs = {
3061 0, // int serial_nesting_level; //corresponds to value of th_team_serialized
3062 (kmp_int8)__kmp_dflt_nested, // int nested; //internal control
3063 // for nested parallelism (per thread)
3064 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
3065 // adjustment of threads (per thread)
3066 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
3067 // whether blocktime is explicitly set
3068 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003069#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00003070 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime
3071// intervals
Jonathan Peytone1c7c132016-10-07 18:12:19 +00003072#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003073 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
3074 // next parallel region (per thread)
3075 // (use a max ub on value if __kmp_parallel_initialize not called yet)
3076 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control
3077 // for max_active_levels
3078 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
3079// {sched,chunk} pair
Jim Cownie5e8470a2013-09-27 10:38:44 +00003080#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003081 __kmp_nested_proc_bind.bind_types[0],
3082 __kmp_default_device,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003083#endif /* OMP_40_ENABLED */
Jonathan Peyton30419822017-05-12 18:01:32 +00003084 NULL // struct kmp_internal_control *next;
3085 };
Jim Cownie5e8470a2013-09-27 10:38:44 +00003086
Jonathan Peyton30419822017-05-12 18:01:32 +00003087 return g_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003088}
3089
Jonathan Peyton30419822017-05-12 18:01:32 +00003090static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003091
Jonathan Peyton30419822017-05-12 18:01:32 +00003092 kmp_internal_control_t gx_icvs;
3093 gx_icvs.serial_nesting_level =
3094 0; // probably =team->t.t_serial like in save_inter_controls
3095 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3096 gx_icvs.next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003097
Jonathan Peyton30419822017-05-12 18:01:32 +00003098 return gx_icvs;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003099}
3100
Jonathan Peyton30419822017-05-12 18:01:32 +00003101static void __kmp_initialize_root(kmp_root_t *root) {
3102 int f;
3103 kmp_team_t *root_team;
3104 kmp_team_t *hot_team;
3105 int hot_team_max_nth;
3106 kmp_r_sched_t r_sched =
3107 __kmp_get_schedule_global(); // get current state of scheduling globals
3108 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3109 KMP_DEBUG_ASSERT(root);
3110 KMP_ASSERT(!root->r.r_begin);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003111
Jonathan Peyton30419822017-05-12 18:01:32 +00003112 /* setup the root state structure */
3113 __kmp_init_lock(&root->r.r_begin_lock);
3114 root->r.r_begin = FALSE;
3115 root->r.r_active = FALSE;
3116 root->r.r_in_parallel = 0;
3117 root->r.r_blocktime = __kmp_dflt_blocktime;
3118 root->r.r_nested = __kmp_dflt_nested;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003119
Jonathan Peyton30419822017-05-12 18:01:32 +00003120 /* setup the root team for this task */
3121 /* allocate the root team structure */
3122 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003123
Jonathan Peyton30419822017-05-12 18:01:32 +00003124 root_team =
3125 __kmp_allocate_team(root,
3126 1, // new_nproc
3127 1, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003128#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003129 0, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003130#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003131#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003132 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003133#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003134 &r_icvs,
3135 0 // argc
3136 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3137 );
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003138#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00003139 // Non-NULL value should be assigned to make the debugger display the root
3140 // team.
3141 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003142#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003143
Jonathan Peyton30419822017-05-12 18:01:32 +00003144 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003145
Jonathan Peyton30419822017-05-12 18:01:32 +00003146 root->r.r_root_team = root_team;
3147 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003148
Jonathan Peyton30419822017-05-12 18:01:32 +00003149 /* initialize root team */
3150 root_team->t.t_threads[0] = NULL;
3151 root_team->t.t_nproc = 1;
3152 root_team->t.t_serialized = 1;
3153 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3154 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3155 root_team->t.t_sched.chunk = r_sched.chunk;
3156 KA_TRACE(
3157 20,
3158 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3159 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003160
Jonathan Peyton30419822017-05-12 18:01:32 +00003161 /* setup the hot team for this task */
3162 /* allocate the hot team structure */
3163 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003164
Jonathan Peyton30419822017-05-12 18:01:32 +00003165 hot_team =
3166 __kmp_allocate_team(root,
3167 1, // new_nproc
3168 __kmp_dflt_team_nth_ub * 2, // max_nproc
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003169#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003170 0, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003171#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003172#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003173 __kmp_nested_proc_bind.bind_types[0],
Jim Cownie5e8470a2013-09-27 10:38:44 +00003174#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003175 &r_icvs,
3176 0 // argc
3177 USE_NESTED_HOT_ARG(NULL) // master thread is unknown
3178 );
3179 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003180
Jonathan Peyton30419822017-05-12 18:01:32 +00003181 root->r.r_hot_team = hot_team;
3182 root_team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003183
Jonathan Peyton30419822017-05-12 18:01:32 +00003184 /* first-time initialization */
3185 hot_team->t.t_parent = root_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003186
Jonathan Peyton30419822017-05-12 18:01:32 +00003187 /* initialize hot team */
3188 hot_team_max_nth = hot_team->t.t_max_nproc;
3189 for (f = 0; f < hot_team_max_nth; ++f) {
3190 hot_team->t.t_threads[f] = NULL;
3191 }; // for
3192 hot_team->t.t_nproc = 1;
3193 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
3194 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3195 hot_team->t.t_sched.chunk = r_sched.chunk;
3196 hot_team->t.t_size_changed = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003197}
3198
3199#ifdef KMP_DEBUG
3200
Jim Cownie5e8470a2013-09-27 10:38:44 +00003201typedef struct kmp_team_list_item {
Jonathan Peyton30419822017-05-12 18:01:32 +00003202 kmp_team_p const *entry;
3203 struct kmp_team_list_item *next;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003204} kmp_team_list_item_t;
Jonathan Peyton30419822017-05-12 18:01:32 +00003205typedef kmp_team_list_item_t *kmp_team_list_t;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003206
Jonathan Peyton30419822017-05-12 18:01:32 +00003207static void __kmp_print_structure_team_accum( // Add team to list of teams.
3208 kmp_team_list_t list, // List of teams.
3209 kmp_team_p const *team // Team to add.
3210 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003211
Jonathan Peyton30419822017-05-12 18:01:32 +00003212 // List must terminate with item where both entry and next are NULL.
3213 // Team is added to the list only once.
3214 // List is sorted in ascending order by team id.
3215 // Team id is *not* a key.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003216
Jonathan Peyton30419822017-05-12 18:01:32 +00003217 kmp_team_list_t l;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003218
Jonathan Peyton30419822017-05-12 18:01:32 +00003219 KMP_DEBUG_ASSERT(list != NULL);
3220 if (team == NULL) {
3221 return;
3222 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003223
Jonathan Peyton30419822017-05-12 18:01:32 +00003224 __kmp_print_structure_team_accum(list, team->t.t_parent);
3225 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003226
Jonathan Peyton30419822017-05-12 18:01:32 +00003227 // Search list for the team.
3228 l = list;
3229 while (l->next != NULL && l->entry != team) {
3230 l = l->next;
3231 }; // while
3232 if (l->next != NULL) {
3233 return; // Team has been added before, exit.
3234 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003235
Jonathan Peyton30419822017-05-12 18:01:32 +00003236 // Team is not found. Search list again for insertion point.
3237 l = list;
3238 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3239 l = l->next;
3240 }; // while
Jim Cownie5e8470a2013-09-27 10:38:44 +00003241
Jonathan Peyton30419822017-05-12 18:01:32 +00003242 // Insert team.
3243 {
3244 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3245 sizeof(kmp_team_list_item_t));
3246 *item = *l;
3247 l->entry = team;
3248 l->next = item;
3249 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003250}
3251
Jonathan Peyton30419822017-05-12 18:01:32 +00003252static void __kmp_print_structure_team(char const *title, kmp_team_p const *team
Jim Cownie5e8470a2013-09-27 10:38:44 +00003253
Jonathan Peyton30419822017-05-12 18:01:32 +00003254 ) {
3255 __kmp_printf("%s", title);
3256 if (team != NULL) {
3257 __kmp_printf("%2x %p\n", team->t.t_id, team);
3258 } else {
3259 __kmp_printf(" - (nil)\n");
3260 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003261}
3262
Jonathan Peyton30419822017-05-12 18:01:32 +00003263static void __kmp_print_structure_thread(char const *title,
3264 kmp_info_p const *thread) {
3265 __kmp_printf("%s", title);
3266 if (thread != NULL) {
3267 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3268 } else {
3269 __kmp_printf(" - (nil)\n");
3270 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003271}
3272
Jonathan Peyton30419822017-05-12 18:01:32 +00003273void __kmp_print_structure(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003274
Jonathan Peyton30419822017-05-12 18:01:32 +00003275 kmp_team_list_t list;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003276
Jonathan Peyton30419822017-05-12 18:01:32 +00003277 // Initialize list of teams.
3278 list =
3279 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t));
3280 list->entry = NULL;
3281 list->next = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003282
Jonathan Peyton30419822017-05-12 18:01:32 +00003283 __kmp_printf("\n------------------------------\nGlobal Thread "
3284 "Table\n------------------------------\n");
3285 {
3286 int gtid;
3287 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3288 __kmp_printf("%2d", gtid);
3289 if (__kmp_threads != NULL) {
3290 __kmp_printf(" %p", __kmp_threads[gtid]);
3291 }; // if
3292 if (__kmp_root != NULL) {
3293 __kmp_printf(" %p", __kmp_root[gtid]);
3294 }; // if
3295 __kmp_printf("\n");
3296 }; // for gtid
3297 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003298
Jonathan Peyton30419822017-05-12 18:01:32 +00003299 // Print out __kmp_threads array.
3300 __kmp_printf("\n------------------------------\nThreads\n--------------------"
3301 "----------\n");
3302 if (__kmp_threads != NULL) {
3303 int gtid;
3304 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3305 kmp_info_t const *thread = __kmp_threads[gtid];
3306 if (thread != NULL) {
3307 __kmp_printf("GTID %2d %p:\n", gtid, thread);
3308 __kmp_printf(" Our Root: %p\n", thread->th.th_root);
3309 __kmp_print_structure_team(" Our Team: ", thread->th.th_team);
3310 __kmp_print_structure_team(" Serial Team: ",
3311 thread->th.th_serial_team);
3312 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);
3313 __kmp_print_structure_thread(" Master: ",
3314 thread->th.th_team_master);
3315 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
3316 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003317#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003318 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003319#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003320 __kmp_print_structure_thread(" Next in pool: ",
3321 thread->th.th_next_pool);
3322 __kmp_printf("\n");
3323 __kmp_print_structure_team_accum(list, thread->th.th_team);
3324 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3325 }; // if
3326 }; // for gtid
3327 } else {
3328 __kmp_printf("Threads array is not allocated.\n");
3329 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003330
Jonathan Peyton30419822017-05-12 18:01:32 +00003331 // Print out __kmp_root array.
3332 __kmp_printf("\n------------------------------\nUbers\n----------------------"
3333 "--------\n");
3334 if (__kmp_root != NULL) {
3335 int gtid;
3336 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3337 kmp_root_t const *root = __kmp_root[gtid];
3338 if (root != NULL) {
3339 __kmp_printf("GTID %2d %p:\n", gtid, root);
3340 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team);
3341 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);
3342 __kmp_print_structure_thread(" Uber Thread: ",
3343 root->r.r_uber_thread);
3344 __kmp_printf(" Active?: %2d\n", root->r.r_active);
3345 __kmp_printf(" Nested?: %2d\n", root->r.r_nested);
3346 __kmp_printf(" In Parallel: %2d\n", root->r.r_in_parallel);
3347 __kmp_printf("\n");
3348 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3349 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3350 }; // if
3351 }; // for gtid
3352 } else {
3353 __kmp_printf("Ubers array is not allocated.\n");
3354 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003355
Jonathan Peyton30419822017-05-12 18:01:32 +00003356 __kmp_printf("\n------------------------------\nTeams\n----------------------"
3357 "--------\n");
3358 while (list->next != NULL) {
3359 kmp_team_p const *team = list->entry;
3360 int i;
3361 __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
3362 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent);
3363 __kmp_printf(" Master TID: %2d\n", team->t.t_master_tid);
3364 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);
3365 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);
3366 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc);
3367 for (i = 0; i < team->t.t_nproc; ++i) {
3368 __kmp_printf(" Thread %2d: ", i);
3369 __kmp_print_structure_thread("", team->t.t_threads[i]);
3370 }; // for i
3371 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);
3372 __kmp_printf("\n");
3373 list = list->next;
3374 }; // while
Jim Cownie5e8470a2013-09-27 10:38:44 +00003375
Jonathan Peyton30419822017-05-12 18:01:32 +00003376 // Print out __kmp_thread_pool and __kmp_team_pool.
3377 __kmp_printf("\n------------------------------\nPools\n----------------------"
3378 "--------\n");
3379 __kmp_print_structure_thread("Thread pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003380 CCAST(kmp_info_t *, __kmp_thread_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003381 __kmp_print_structure_team("Team pool: ",
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00003382 CCAST(kmp_team_t *, __kmp_team_pool));
Jonathan Peyton30419822017-05-12 18:01:32 +00003383 __kmp_printf("\n");
Jim Cownie5e8470a2013-09-27 10:38:44 +00003384
Jonathan Peyton30419822017-05-12 18:01:32 +00003385 // Free team list.
3386 while (list != NULL) {
3387 kmp_team_list_item_t *item = list;
3388 list = list->next;
3389 KMP_INTERNAL_FREE(item);
3390 }; // while
Jim Cownie5e8470a2013-09-27 10:38:44 +00003391}
3392
3393#endif
3394
Jim Cownie5e8470a2013-09-27 10:38:44 +00003395//---------------------------------------------------------------------------
3396// Stuff for per-thread fast random number generator
3397// Table of primes
Jim Cownie5e8470a2013-09-27 10:38:44 +00003398static const unsigned __kmp_primes[] = {
Jonathan Peyton30419822017-05-12 18:01:32 +00003399 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3400 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3401 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3402 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3403 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3404 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3405 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3406 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3407 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3408 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3409 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
Jim Cownie5e8470a2013-09-27 10:38:44 +00003410
3411//---------------------------------------------------------------------------
3412// __kmp_get_random: Get a random number using a linear congruential method.
Jonathan Peyton30419822017-05-12 18:01:32 +00003413unsigned short __kmp_get_random(kmp_info_t *thread) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003414 unsigned x = thread->th.th_x;
Jonathan Peyton30419822017-05-12 18:01:32 +00003415 unsigned short r = x >> 16;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003416
Jonathan Peyton30419822017-05-12 18:01:32 +00003417 thread->th.th_x = x * thread->th.th_a + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003418
3419 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
Jonathan Peyton30419822017-05-12 18:01:32 +00003420 thread->th.th_info.ds.ds_tid, r));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003421
3422 return r;
3423}
3424//--------------------------------------------------------
3425// __kmp_init_random: Initialize a random number generator
Jonathan Peyton30419822017-05-12 18:01:32 +00003426void __kmp_init_random(kmp_info_t *thread) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00003427 unsigned seed = thread->th.th_info.ds.ds_tid;
3428
Jonathan Peyton30419822017-05-12 18:01:32 +00003429 thread->th.th_a =
3430 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];
3431 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3432 KA_TRACE(30,
3433 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003434}
3435
Jim Cownie5e8470a2013-09-27 10:38:44 +00003436#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00003437/* reclaim array entries for root threads that are already dead, returns number
3438 * reclaimed */
3439static int __kmp_reclaim_dead_roots(void) {
3440 int i, r = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003441
Jonathan Peyton30419822017-05-12 18:01:32 +00003442 for (i = 0; i < __kmp_threads_capacity; ++i) {
3443 if (KMP_UBER_GTID(i) &&
3444 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3445 !__kmp_root[i]
3446 ->r.r_active) { // AC: reclaim only roots died in non-active state
3447 r += __kmp_unregister_root_other_thread(i);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003448 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003449 }
3450 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003451}
3452#endif
3453
Jonathan Peyton30419822017-05-12 18:01:32 +00003454/* This function attempts to create free entries in __kmp_threads and
3455 __kmp_root, and returns the number of free entries generated.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003456
Jonathan Peyton30419822017-05-12 18:01:32 +00003457 For Windows* OS static library, the first mechanism used is to reclaim array
3458 entries for root threads that are already dead.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003459
Jonathan Peyton30419822017-05-12 18:01:32 +00003460 On all platforms, expansion is attempted on the arrays __kmp_threads_ and
3461 __kmp_root, with appropriate update to __kmp_threads_capacity. Array
3462 capacity is increased by doubling with clipping to __kmp_tp_capacity, if
3463 threadprivate cache array has been created. Synchronization with
3464 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003465
Jonathan Peyton30419822017-05-12 18:01:32 +00003466 After any dead root reclamation, if the clipping value allows array expansion
3467 to result in the generation of a total of nWish free slots, the function does
3468 that expansion. If not, but the clipping value allows array expansion to
3469 result in the generation of a total of nNeed free slots, the function does
3470 that expansion. Otherwise, nothing is done beyond the possible initial root
3471 thread reclamation. However, if nNeed is zero, a best-effort attempt is made
3472 to fulfil nWish as far as possible, i.e. the function will attempt to create
Jim Cownie5e8470a2013-09-27 10:38:44 +00003473 as many free slots as possible up to nWish.
3474
Jonathan Peyton30419822017-05-12 18:01:32 +00003475 If any argument is negative, the behavior is undefined. */
3476static int __kmp_expand_threads(int nWish, int nNeed) {
3477 int added = 0;
3478 int old_tp_cached;
3479 int __kmp_actual_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003480
Jonathan Peyton30419822017-05-12 18:01:32 +00003481 if (nNeed > nWish) /* normalize the arguments */
3482 nWish = nNeed;
Jonathan Peyton99016992015-05-26 17:32:53 +00003483#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
Jonathan Peyton30419822017-05-12 18:01:32 +00003484 /* only for Windows static library */
3485 /* reclaim array entries for root threads that are already dead */
3486 added = __kmp_reclaim_dead_roots();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003487
Jonathan Peyton30419822017-05-12 18:01:32 +00003488 if (nNeed) {
3489 nNeed -= added;
3490 if (nNeed < 0)
3491 nNeed = 0;
3492 }
3493 if (nWish) {
3494 nWish -= added;
3495 if (nWish < 0)
3496 nWish = 0;
3497 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003498#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003499 if (nWish <= 0)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003500 return added;
Jonathan Peyton30419822017-05-12 18:01:32 +00003501
3502 while (1) {
3503 int nTarget;
3504 int minimumRequiredCapacity;
3505 int newCapacity;
3506 kmp_info_t **newThreads;
3507 kmp_root_t **newRoot;
3508
3509 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
3510 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
3511 // user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may become
3512 // > __kmp_max_nth in one of two ways:
3513 //
3514 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
3515 // may not be resused by another thread, so we may need to increase
3516 // __kmp_threads_capacity to __kmp_max_threads + 1.
3517 //
3518 // 2) New foreign root(s) are encountered. We always register new foreign
3519 // roots. This may cause a smaller # of threads to be allocated at
3520 // subsequent parallel regions, but the worker threads hang around (and
3521 // eventually go to sleep) and need slots in the __kmp_threads[] array.
3522 //
3523 // Anyway, that is the reason for moving the check to see if
3524 // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
3525 // instead of having it performed here. -BB
3526 old_tp_cached = __kmp_tp_cached;
3527 __kmp_actual_max_nth =
3528 old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3529 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3530
3531 /* compute expansion headroom to check if we can expand and whether to aim
3532 for nWish or nNeed */
3533 nTarget = nWish;
3534 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3535 /* can't fulfil nWish, so try nNeed */
3536 if (nNeed) {
3537 nTarget = nNeed;
3538 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3539 /* possible expansion too small -- give up */
3540 break;
3541 }
3542 } else {
3543 /* best-effort */
3544 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3545 if (!nTarget) {
3546 /* can expand at all -- give up */
3547 break;
3548 }
3549 }
3550 }
3551 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3552
3553 newCapacity = __kmp_threads_capacity;
3554 do {
3555 newCapacity = newCapacity <= (__kmp_actual_max_nth >> 1)
3556 ? (newCapacity << 1)
3557 : __kmp_actual_max_nth;
3558 } while (newCapacity < minimumRequiredCapacity);
3559 newThreads = (kmp_info_t **)__kmp_allocate(
3560 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity +
3561 CACHE_LINE);
3562 newRoot = (kmp_root_t **)((char *)newThreads +
3563 sizeof(kmp_info_t *) * newCapacity);
3564 KMP_MEMCPY(newThreads, __kmp_threads,
3565 __kmp_threads_capacity * sizeof(kmp_info_t *));
3566 KMP_MEMCPY(newRoot, __kmp_root,
3567 __kmp_threads_capacity * sizeof(kmp_root_t *));
3568 memset(newThreads + __kmp_threads_capacity, 0,
3569 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t *));
3570 memset(newRoot + __kmp_threads_capacity, 0,
3571 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t *));
3572
3573 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3574 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has
3575 allocated a threadprivate cache while we were allocating the expanded
3576 array, and our new capacity is larger than the threadprivate cache
3577 capacity, so we should deallocate the expanded arrays and try again.
3578 This is the first check of a double-check pair. */
3579 __kmp_free(newThreads);
3580 continue; /* start over and try again */
3581 }
3582 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3583 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3584 /* Same check as above, but this time with the lock so we can be sure if
3585 we can succeed. */
3586 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3587 __kmp_free(newThreads);
3588 continue; /* start over and try again */
3589 } else {
3590 /* success */
3591 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be
3592 // investigated.
3593 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads;
3594 *(kmp_root_t * *volatile *)&__kmp_root = newRoot;
3595 added += newCapacity - __kmp_threads_capacity;
3596 *(volatile int *)&__kmp_threads_capacity = newCapacity;
3597 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3598 break; /* succeeded, so we can exit the loop */
3599 }
3600 }
3601 return added;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003602}
3603
Jonathan Peyton30419822017-05-12 18:01:32 +00003604/* Register the current thread as a root thread and obtain our gtid. We must
3605 have the __kmp_initz_lock held at this point. Argument TRUE only if are the
3606 thread that calls from __kmp_do_serial_initialize() */
3607int __kmp_register_root(int initial_thread) {
3608 kmp_info_t *root_thread;
3609 kmp_root_t *root;
3610 int gtid;
3611 int capacity;
3612 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3613 KA_TRACE(20, ("__kmp_register_root: entered\n"));
3614 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003615
Jonathan Peyton30419822017-05-12 18:01:32 +00003616 /* 2007-03-02:
3617 If initial thread did not invoke OpenMP RTL yet, and this thread is not an
3618 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not
3619 work as expected -- it may return false (that means there is at least one
3620 empty slot in __kmp_threads array), but it is possible the only free slot
3621 is #0, which is reserved for initial thread and so cannot be used for this
3622 one. Following code workarounds this bug.
Jim Cownie5e8470a2013-09-27 10:38:44 +00003623
Jonathan Peyton30419822017-05-12 18:01:32 +00003624 However, right solution seems to be not reserving slot #0 for initial
3625 thread because:
3626 (1) there is no magic in slot #0,
3627 (2) we cannot detect initial thread reliably (the first thread which does
3628 serial initialization may be not a real initial thread).
3629 */
3630 capacity = __kmp_threads_capacity;
3631 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3632 --capacity;
3633 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00003634
Jonathan Peyton30419822017-05-12 18:01:32 +00003635 /* see if there are too many threads */
3636 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1, 1)) {
3637 if (__kmp_tp_cached) {
3638 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantRegisterNewThread),
3639 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3640 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3641 } else {
3642 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantRegisterNewThread),
3643 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003644 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003645 }; // if
3646
3647 /* find an available thread slot */
3648 /* Don't reassign the zero slot since we need that to only be used by initial
3649 thread */
3650 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3651 gtid++)
3652 ;
3653 KA_TRACE(1,
3654 ("__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3655 KMP_ASSERT(gtid < __kmp_threads_capacity);
3656
3657 /* update global accounting */
3658 __kmp_all_nth++;
3659 TCW_4(__kmp_nth, __kmp_nth + 1);
3660
3661 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
3662 // numbers of procs, and method #2 (keyed API call) for higher numbers.
3663 if (__kmp_adjust_gtid_mode) {
3664 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3665 if (TCR_4(__kmp_gtid_mode) != 2) {
3666 TCW_4(__kmp_gtid_mode, 2);
3667 }
3668 } else {
3669 if (TCR_4(__kmp_gtid_mode) != 1) {
3670 TCW_4(__kmp_gtid_mode, 1);
3671 }
3672 }
3673 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003674
3675#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00003676 /* Adjust blocktime to zero if necessary */
3677 /* Middle initialization might not have occurred yet */
3678 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3679 if (__kmp_nth > __kmp_avail_proc) {
3680 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003681 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003682 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003683#endif /* KMP_ADJUST_BLOCKTIME */
3684
Jonathan Peyton30419822017-05-12 18:01:32 +00003685 /* setup this new hierarchy */
3686 if (!(root = __kmp_root[gtid])) {
3687 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t));
3688 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3689 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003690
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003691#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003692 // Initialize stats as soon as possible (right after gtid assignment).
3693 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3694 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
3695 KMP_SET_THREAD_STATE(SERIAL_REGION);
3696 KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
Jonathan Peyton5375fe82016-11-14 21:13:44 +00003697#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003698 __kmp_initialize_root(root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003699
Jonathan Peyton30419822017-05-12 18:01:32 +00003700 /* setup new root thread structure */
3701 if (root->r.r_uber_thread) {
3702 root_thread = root->r.r_uber_thread;
3703 } else {
3704 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
3705 if (__kmp_storage_map) {
3706 __kmp_print_thread_storage_map(root_thread, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003707 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003708 root_thread->th.th_info.ds.ds_gtid = gtid;
3709 root_thread->th.th_root = root;
3710 if (__kmp_env_consistency_check) {
3711 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3712 }
3713#if USE_FAST_MEMORY
3714 __kmp_initialize_fast_memory(root_thread);
3715#endif /* USE_FAST_MEMORY */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003716
Jonathan Peyton30419822017-05-12 18:01:32 +00003717#if KMP_USE_BGET
3718 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3719 __kmp_initialize_bget(root_thread);
3720#endif
3721 __kmp_init_random(root_thread); // Initialize random number generator
3722 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003723
Jonathan Peyton30419822017-05-12 18:01:32 +00003724 /* setup the serial team held in reserve by the root thread */
3725 if (!root_thread->th.th_serial_team) {
3726 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3727 KF_TRACE(10, ("__kmp_register_root: before serial_team\n"));
3728 root_thread->th.th_serial_team =
3729 __kmp_allocate_team(root, 1, 1,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003730#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003731 0, // root parallel id
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003732#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003733#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003734 proc_bind_default,
Jim Cownie5e8470a2013-09-27 10:38:44 +00003735#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003736 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3737 }
3738 KMP_ASSERT(root_thread->th.th_serial_team);
3739 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",
3740 root_thread->th.th_serial_team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003741
Jonathan Peyton30419822017-05-12 18:01:32 +00003742 /* drop root_thread into place */
3743 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003744
Jonathan Peyton30419822017-05-12 18:01:32 +00003745 root->r.r_root_team->t.t_threads[0] = root_thread;
3746 root->r.r_hot_team->t.t_threads[0] = root_thread;
3747 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3748 // AC: the team created in reserve, not for execution (it is unused for now).
3749 root_thread->th.th_serial_team->t.t_serialized = 0;
3750 root->r.r_uber_thread = root_thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003751
Jonathan Peyton30419822017-05-12 18:01:32 +00003752 /* initialize the thread, get it ready to go */
3753 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3754 TCW_4(__kmp_init_gtid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003755
Jonathan Peyton30419822017-05-12 18:01:32 +00003756 /* prepare the master thread for get_gtid() */
3757 __kmp_gtid_set_specific(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003758
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003759#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00003760 __kmp_itt_thread_name(gtid);
Jonathan Peyton7abf9d52016-05-26 18:19:10 +00003761#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003762
Jonathan Peyton30419822017-05-12 18:01:32 +00003763#ifdef KMP_TDATA_GTID
3764 __kmp_gtid = gtid;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00003765#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003766 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3767 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3768
3769 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3770 "plain=%u\n",
3771 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3772 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3773 KMP_INIT_BARRIER_STATE));
3774 { // Initialize barrier data.
3775 int b;
3776 for (b = 0; b < bs_last_barrier; ++b) {
3777 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3778#if USE_DEBUGGER
3779 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3780#endif
3781 }; // for
3782 }
3783 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3784 KMP_INIT_BARRIER_STATE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003785
Alp Toker763b9392014-02-28 09:42:41 +00003786#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00003787#if OMP_40_ENABLED
3788 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3789 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3790 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3791 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3792#endif
Jonathan Peyton2f7c0772016-02-25 18:49:52 +00003793
Jonathan Peyton30419822017-05-12 18:01:32 +00003794 if (TCR_4(__kmp_init_middle)) {
3795 __kmp_affinity_set_init_mask(gtid, TRUE);
3796 }
Alp Toker763b9392014-02-28 09:42:41 +00003797#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003798
Jonathan Peyton30419822017-05-12 18:01:32 +00003799 __kmp_root_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003800
Jonathan Peyton30419822017-05-12 18:01:32 +00003801 KMP_MB();
3802 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003803
Jonathan Peyton30419822017-05-12 18:01:32 +00003804 return gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003805}
3806
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003807#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003808static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
3809 const int max_level) {
3810 int i, n, nth;
3811 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3812 if (!hot_teams || !hot_teams[level].hot_team) {
3813 return 0;
3814 }
3815 KMP_DEBUG_ASSERT(level < max_level);
3816 kmp_team_t *team = hot_teams[level].hot_team;
3817 nth = hot_teams[level].hot_team_nth;
3818 n = nth - 1; // master is not freed
3819 if (level < max_level - 1) {
3820 for (i = 0; i < nth; ++i) {
3821 kmp_info_t *th = team->t.t_threads[i];
3822 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3823 if (i > 0 && th->th.th_hot_teams) {
3824 __kmp_free(th->th.th_hot_teams);
3825 th->th.th_hot_teams = NULL;
3826 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003827 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003828 }
3829 __kmp_free_team(root, team, NULL);
3830 return n;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003831}
3832#endif
3833
Jonathan Peyton30419822017-05-12 18:01:32 +00003834// Resets a root thread and clear its root and hot teams.
3835// Returns the number of __kmp_threads entries directly and indirectly freed.
3836static int __kmp_reset_root(int gtid, kmp_root_t *root) {
3837 kmp_team_t *root_team = root->r.r_root_team;
3838 kmp_team_t *hot_team = root->r.r_hot_team;
3839 int n = hot_team->t.t_nproc;
3840 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003841
Jonathan Peyton30419822017-05-12 18:01:32 +00003842 KMP_DEBUG_ASSERT(!root->r.r_active);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003843
Jonathan Peyton30419822017-05-12 18:01:32 +00003844 root->r.r_root_team = NULL;
3845 root->r.r_hot_team = NULL;
3846 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team
3847 // before call to __kmp_free_team().
3848 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003849#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00003850 if (__kmp_hot_teams_max_level >
3851 0) { // need to free nested hot teams and their threads if any
3852 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3853 kmp_info_t *th = hot_team->t.t_threads[i];
3854 if (__kmp_hot_teams_max_level > 1) {
3855 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3856 }
3857 if (th->th.th_hot_teams) {
3858 __kmp_free(th->th.th_hot_teams);
3859 th->th.th_hot_teams = NULL;
3860 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003861 }
Jonathan Peyton30419822017-05-12 18:01:32 +00003862 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003863#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003864 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003865
Jonathan Peyton30419822017-05-12 18:01:32 +00003866 // Before we can reap the thread, we need to make certain that all other
3867 // threads in the teams that had this root as ancestor have stopped trying to
3868 // steal tasks.
3869 if (__kmp_tasking_mode != tskm_immediate_exec) {
3870 __kmp_wait_to_unref_task_teams();
3871 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003872
Jonathan Peyton30419822017-05-12 18:01:32 +00003873#if KMP_OS_WINDOWS
3874 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
3875 KA_TRACE(
3876 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3877 "\n",
3878 (LPVOID) & (root->r.r_uber_thread->th),
3879 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3880 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3881#endif /* KMP_OS_WINDOWS */
Jim Cownie5e8470a2013-09-27 10:38:44 +00003882
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003883#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003884 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3885 int gtid = __kmp_get_gtid();
3886 __ompt_thread_end(ompt_thread_initial, gtid);
3887 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00003888#endif
3889
Jonathan Peyton30419822017-05-12 18:01:32 +00003890 TCW_4(__kmp_nth,
3891 __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
3892 __kmp_reap_thread(root->r.r_uber_thread, 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003893
Jonathan Peyton30419822017-05-12 18:01:32 +00003894 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead
3895 // of freeing.
3896 root->r.r_uber_thread = NULL;
3897 /* mark root as no longer in use */
3898 root->r.r_begin = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003899
Jonathan Peyton30419822017-05-12 18:01:32 +00003900 return n;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003901}
3902
Jonathan Peyton30419822017-05-12 18:01:32 +00003903void __kmp_unregister_root_current_thread(int gtid) {
3904 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3905 /* this lock should be ok, since unregister_root_current_thread is never
3906 called during an abort, only during a normal close. furthermore, if you
3907 have the forkjoin lock, you should never try to get the initz lock */
3908 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3909 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3910 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "
3911 "exiting T#%d\n",
3912 gtid));
3913 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3914 return;
3915 }
3916 kmp_root_t *root = __kmp_root[gtid];
Jim Cownie77c2a632014-09-03 11:34:33 +00003917
Jonathan Peyton30419822017-05-12 18:01:32 +00003918 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3919 KMP_ASSERT(KMP_UBER_GTID(gtid));
3920 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3921 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003922
Jonathan Peyton30419822017-05-12 18:01:32 +00003923 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00003924
Jonathan Peytondf6818b2016-06-14 17:57:47 +00003925#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00003926 kmp_info_t *thread = __kmp_threads[gtid];
3927 kmp_team_t *team = thread->th.th_team;
3928 kmp_task_team_t *task_team = thread->th.th_task_team;
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003929
Jonathan Peyton30419822017-05-12 18:01:32 +00003930 // we need to wait for the proxy tasks before finishing the thread
3931 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003932#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00003933 // the runtime is shutting down so we won't report any events
3934 thread->th.ompt_thread_info.state = ompt_state_undefined;
Jonathan Peyton6d247f72015-09-10 21:33:50 +00003935#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00003936 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3937 }
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00003938#endif
3939
Jonathan Peyton30419822017-05-12 18:01:32 +00003940 __kmp_reset_root(gtid, root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003941
Jonathan Peyton30419822017-05-12 18:01:32 +00003942 /* free up this thread slot */
3943 __kmp_gtid_set_specific(KMP_GTID_DNE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003944#ifdef KMP_TDATA_GTID
Jonathan Peyton30419822017-05-12 18:01:32 +00003945 __kmp_gtid = KMP_GTID_DNE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003946#endif
3947
Jonathan Peyton30419822017-05-12 18:01:32 +00003948 KMP_MB();
3949 KC_TRACE(10,
3950 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00003951
Jonathan Peyton30419822017-05-12 18:01:32 +00003952 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003953}
3954
Jonathan Peyton2321d572015-06-08 19:25:25 +00003955#if KMP_OS_WINDOWS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003956/* __kmp_forkjoin_lock must be already held
Jonathan Peyton30419822017-05-12 18:01:32 +00003957 Unregisters a root thread that is not the current thread. Returns the number
3958 of __kmp_threads entries freed as a result. */
3959static int __kmp_unregister_root_other_thread(int gtid) {
3960 kmp_root_t *root = __kmp_root[gtid];
3961 int r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003962
Jonathan Peyton30419822017-05-12 18:01:32 +00003963 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
3964 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3965 KMP_ASSERT(KMP_UBER_GTID(gtid));
3966 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3967 KMP_ASSERT(root->r.r_active == FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003968
Jonathan Peyton30419822017-05-12 18:01:32 +00003969 r = __kmp_reset_root(gtid, root);
3970 KC_TRACE(10,
3971 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
3972 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003973}
Jonathan Peyton2321d572015-06-08 19:25:25 +00003974#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00003975
Jim Cownie5e8470a2013-09-27 10:38:44 +00003976#if KMP_DEBUG
3977void __kmp_task_info() {
3978
Jonathan Peyton30419822017-05-12 18:01:32 +00003979 kmp_int32 gtid = __kmp_entry_gtid();
3980 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
3981 kmp_info_t *this_thr = __kmp_threads[gtid];
3982 kmp_team_t *steam = this_thr->th.th_serial_team;
3983 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003984
Jonathan Peyton30419822017-05-12 18:01:32 +00003985 __kmp_printf("__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p "
3986 "ptask=%p\n",
3987 gtid, tid, this_thr, team, this_thr->th.th_current_task,
3988 team->t.t_implicit_task_taskdata[tid].td_parent);
Jim Cownie5e8470a2013-09-27 10:38:44 +00003989}
3990#endif // KMP_DEBUG
3991
Jonathan Peyton30419822017-05-12 18:01:32 +00003992/* TODO optimize with one big memclr, take out what isn't needed, split
3993 responsibility to workers as much as possible, and delay initialization of
3994 features as much as possible */
3995static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
3996 int tid, int gtid) {
3997 /* this_thr->th.th_info.ds.ds_gtid is setup in
3998 kmp_allocate_thread/create_worker.
3999 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4000 kmp_info_t *master = team->t.t_threads[0];
4001 KMP_DEBUG_ASSERT(this_thr != NULL);
4002 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4003 KMP_DEBUG_ASSERT(team);
4004 KMP_DEBUG_ASSERT(team->t.t_threads);
4005 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4006 KMP_DEBUG_ASSERT(master);
4007 KMP_DEBUG_ASSERT(master->th.th_root);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004008
Jonathan Peyton30419822017-05-12 18:01:32 +00004009 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004010
Jonathan Peyton30419822017-05-12 18:01:32 +00004011 TCW_SYNC_PTR(this_thr->th.th_team, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004012
Jonathan Peyton30419822017-05-12 18:01:32 +00004013 this_thr->th.th_info.ds.ds_tid = tid;
4014 this_thr->th.th_set_nproc = 0;
4015 if (__kmp_tasking_mode != tskm_immediate_exec)
4016 // When tasking is possible, threads are not safe to reap until they are
4017 // done tasking; this will be set when tasking code is exited in wait
4018 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4019 else // no tasking --> always safe to reap
4020 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004021#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004022 this_thr->th.th_set_proc_bind = proc_bind_default;
4023#if KMP_AFFINITY_SUPPORTED
4024 this_thr->th.th_new_place = this_thr->th.th_current_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004025#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004026#endif
4027 this_thr->th.th_root = master->th.th_root;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004028
Jonathan Peyton30419822017-05-12 18:01:32 +00004029 /* setup the thread's cache of the team structure */
4030 this_thr->th.th_team_nproc = team->t.t_nproc;
4031 this_thr->th.th_team_master = master;
4032 this_thr->th.th_team_serialized = team->t.t_serialized;
4033 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004034
Jonathan Peyton30419822017-05-12 18:01:32 +00004035 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004036
Jonathan Peyton30419822017-05-12 18:01:32 +00004037 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4038 tid, gtid, this_thr, this_thr->th.th_current_task));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004039
Jonathan Peyton30419822017-05-12 18:01:32 +00004040 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4041 team, tid, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004042
Jonathan Peyton30419822017-05-12 18:01:32 +00004043 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4044 tid, gtid, this_thr, this_thr->th.th_current_task));
4045 // TODO: Initialize ICVs from parent; GEH - isn't that already done in
4046 // __kmp_initialize_team()?
Jim Cownie5e8470a2013-09-27 10:38:44 +00004047
Jonathan Peyton30419822017-05-12 18:01:32 +00004048 /* TODO no worksharing in speculative threads */
4049 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00004050
Jonathan Peyton30419822017-05-12 18:01:32 +00004051 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004052
4053#ifdef BUILD_TV
Jonathan Peyton30419822017-05-12 18:01:32 +00004054 this_thr->th.th_local.tv_data = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004055#endif
4056
Jonathan Peyton30419822017-05-12 18:01:32 +00004057 if (!this_thr->th.th_pri_common) {
4058 this_thr->th.th_pri_common =
4059 (struct common_table *)__kmp_allocate(sizeof(struct common_table));
4060 if (__kmp_storage_map) {
4061 __kmp_print_storage_map_gtid(
4062 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4063 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004064 }; // if
Jonathan Peyton30419822017-05-12 18:01:32 +00004065 this_thr->th.th_pri_head = NULL;
4066 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00004067
Jonathan Peyton30419822017-05-12 18:01:32 +00004068 /* Initialize dynamic dispatch */
4069 {
4070 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4071 // Use team max_nproc since this will never change for the team.
4072 size_t disp_size =
4073 sizeof(dispatch_private_info_t) *
4074 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4075 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4076 team->t.t_max_nproc));
4077 KMP_ASSERT(dispatch);
4078 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4079 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004080
Jonathan Peyton30419822017-05-12 18:01:32 +00004081 dispatch->th_disp_index = 0;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00004082#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004083 dispatch->th_doacross_buf_idx = 0;
Jonathan Peyton71909c52016-03-02 22:42:06 +00004084#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004085 if (!dispatch->th_disp_buffer) {
4086 dispatch->th_disp_buffer =
4087 (dispatch_private_info_t *)__kmp_allocate(disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004088
Jonathan Peyton30419822017-05-12 18:01:32 +00004089 if (__kmp_storage_map) {
4090 __kmp_print_storage_map_gtid(
4091 gtid, &dispatch->th_disp_buffer[0],
4092 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4093 ? 1
4094 : __kmp_dispatch_num_buffers],
4095 disp_size, "th_%d.th_dispatch.th_disp_buffer "
4096 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4097 gtid, team->t.t_id, gtid);
4098 }
4099 } else {
4100 memset(&dispatch->th_disp_buffer[0], '\0', disp_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004101 }
4102
Jonathan Peyton30419822017-05-12 18:01:32 +00004103 dispatch->th_dispatch_pr_current = 0;
4104 dispatch->th_dispatch_sh_current = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004105
Jonathan Peyton30419822017-05-12 18:01:32 +00004106 dispatch->th_deo_fcn = 0; /* ORDERED */
4107 dispatch->th_dxo_fcn = 0; /* END ORDERED */
4108 }
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004109
Jonathan Peyton30419822017-05-12 18:01:32 +00004110 this_thr->th.th_next_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004111
Jonathan Peyton30419822017-05-12 18:01:32 +00004112 if (!this_thr->th.th_task_state_memo_stack) {
4113 size_t i;
4114 this_thr->th.th_task_state_memo_stack =
4115 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8));
4116 this_thr->th.th_task_state_top = 0;
4117 this_thr->th.th_task_state_stack_sz = 4;
4118 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4119 ++i) // zero init the stack
4120 this_thr->th.th_task_state_memo_stack[i] = 0;
4121 }
4122
4123 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4124 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4125
4126 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004127}
4128
Jonathan Peyton30419822017-05-12 18:01:32 +00004129/* allocate a new thread for the requesting team. this is only called from
4130 within a forkjoin critical section. we will first try to get an available
4131 thread from the thread pool. if none is available, we will fork a new one
4132 assuming we are able to create a new one. this should be assured, as the
4133 caller should check on this first. */
4134kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4135 int new_tid) {
4136 kmp_team_t *serial_team;
4137 kmp_info_t *new_thr;
4138 int new_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004139
Jonathan Peyton30419822017-05-12 18:01:32 +00004140 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4141 KMP_DEBUG_ASSERT(root && team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004142#if !KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004143 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004144#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004145 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004146
Jonathan Peyton30419822017-05-12 18:01:32 +00004147 /* first, try to get one from the thread pool */
4148 if (__kmp_thread_pool) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004149
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00004150 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00004151 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
4152 if (new_thr == __kmp_thread_pool_insert_pt) {
4153 __kmp_thread_pool_insert_pt = NULL;
4154 }
4155 TCW_4(new_thr->th.th_in_pool, FALSE);
4156 // Don't touch th_active_in_pool or th_active.
4157 // The worker thread adjusts those flags as it sleeps/awakens.
4158 __kmp_thread_pool_nth--;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004159
Jonathan Peyton30419822017-05-12 18:01:32 +00004160 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4161 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4162 KMP_ASSERT(!new_thr->th.th_team);
4163 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4164 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004165
Jonathan Peyton30419822017-05-12 18:01:32 +00004166 /* setup the thread structure */
4167 __kmp_initialize_info(new_thr, team, new_tid,
4168 new_thr->th.th_info.ds.ds_gtid);
4169 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004170
Jonathan Peyton30419822017-05-12 18:01:32 +00004171 TCW_4(__kmp_nth, __kmp_nth + 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004172
Jonathan Peyton30419822017-05-12 18:01:32 +00004173 new_thr->th.th_task_state = 0;
4174 new_thr->th.th_task_state_top = 0;
4175 new_thr->th.th_task_state_stack_sz = 4;
Andrey Churbanov6d224db2015-02-10 18:37:43 +00004176
Jim Cownie5e8470a2013-09-27 10:38:44 +00004177#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00004178 /* Adjust blocktime back to zero if necessary */
4179 /* Middle initialization might not have occurred yet */
4180 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4181 if (__kmp_nth > __kmp_avail_proc) {
4182 __kmp_zero_bt = TRUE;
4183 }
4184 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004185#endif /* KMP_ADJUST_BLOCKTIME */
4186
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004187#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004188 // If thread entered pool via __kmp_free_thread, wait_flag should !=
4189 // KMP_BARRIER_PARENT_FLAG.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004190 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00004191 kmp_balign_t *balign = new_thr->th.th_bar;
4192 for (b = 0; b < bs_last_barrier; ++b)
4193 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004194#endif
4195
Jonathan Peyton30419822017-05-12 18:01:32 +00004196 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4197 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004198
Jim Cownie5e8470a2013-09-27 10:38:44 +00004199 KMP_MB();
4200 return new_thr;
Jonathan Peyton30419822017-05-12 18:01:32 +00004201 }
4202
4203 /* no, well fork a new one */
4204 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4205 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4206
4207#if KMP_USE_MONITOR
4208 // If this is the first worker thread the RTL is creating, then also
4209 // launch the monitor thread. We try to do this as early as possible.
4210 if (!TCR_4(__kmp_init_monitor)) {
4211 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4212 if (!TCR_4(__kmp_init_monitor)) {
4213 KF_TRACE(10, ("before __kmp_create_monitor\n"));
4214 TCW_4(__kmp_init_monitor, 1);
4215 __kmp_create_monitor(&__kmp_monitor);
4216 KF_TRACE(10, ("after __kmp_create_monitor\n"));
4217#if KMP_OS_WINDOWS
4218 // AC: wait until monitor has started. This is a fix for CQ232808.
4219 // The reason is that if the library is loaded/unloaded in a loop with
4220 // small (parallel) work in between, then there is high probability that
4221 // monitor thread started after the library shutdown. At shutdown it is
4222 // too late to cope with the problem, because when the master is in
4223 // DllMain (process detach) the monitor has no chances to start (it is
4224 // blocked), and master has no means to inform the monitor that the
4225 // library has gone, because all the memory which the monitor can access
4226 // is going to be released/reset.
4227 while (TCR_4(__kmp_init_monitor) < 2) {
4228 KMP_YIELD(TRUE);
4229 }
4230 KF_TRACE(10, ("after monitor thread has started\n"));
4231#endif
4232 }
4233 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4234 }
4235#endif
4236
4237 KMP_MB();
4238 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4239 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4240 }
4241
4242 /* allocate space for it. */
4243 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
4244
4245 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4246
4247 if (__kmp_storage_map) {
4248 __kmp_print_thread_storage_map(new_thr, new_gtid);
4249 }
4250
4251 // add the reserve serialized team, initialized from the team's master thread
4252 {
4253 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4254 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"));
4255 new_thr->th.th_serial_team = serial_team =
4256 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4257#if OMPT_SUPPORT
4258 0, // root parallel id
4259#endif
4260#if OMP_40_ENABLED
4261 proc_bind_default,
4262#endif
4263 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4264 }
4265 KMP_ASSERT(serial_team);
4266 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
4267 // execution (it is unused for now).
4268 serial_team->t.t_threads[0] = new_thr;
4269 KF_TRACE(10,
4270 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4271 new_thr));
4272
4273 /* setup the thread structures */
4274 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4275
4276#if USE_FAST_MEMORY
4277 __kmp_initialize_fast_memory(new_thr);
4278#endif /* USE_FAST_MEMORY */
4279
4280#if KMP_USE_BGET
4281 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4282 __kmp_initialize_bget(new_thr);
4283#endif
4284
4285 __kmp_init_random(new_thr); // Initialize random number generator
4286
4287 /* Initialize these only once when thread is grabbed for a team allocation */
4288 KA_TRACE(20,
4289 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4290 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4291
4292 int b;
4293 kmp_balign_t *balign = new_thr->th.th_bar;
4294 for (b = 0; b < bs_last_barrier; ++b) {
4295 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4296 balign[b].bb.team = NULL;
4297 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4298 balign[b].bb.use_oncore_barrier = 0;
4299 }
4300
4301 new_thr->th.th_spin_here = FALSE;
4302 new_thr->th.th_next_waiting = 0;
4303
4304#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4305 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4306 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4307 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4308 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4309#endif
4310
4311 TCW_4(new_thr->th.th_in_pool, FALSE);
4312 new_thr->th.th_active_in_pool = FALSE;
4313 TCW_4(new_thr->th.th_active, TRUE);
4314
4315 /* adjust the global counters */
4316 __kmp_all_nth++;
4317 __kmp_nth++;
4318
4319 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
4320 // numbers of procs, and method #2 (keyed API call) for higher numbers.
4321 if (__kmp_adjust_gtid_mode) {
4322 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4323 if (TCR_4(__kmp_gtid_mode) != 2) {
4324 TCW_4(__kmp_gtid_mode, 2);
4325 }
4326 } else {
4327 if (TCR_4(__kmp_gtid_mode) != 1) {
4328 TCW_4(__kmp_gtid_mode, 1);
4329 }
4330 }
4331 }
4332
4333#ifdef KMP_ADJUST_BLOCKTIME
4334 /* Adjust blocktime back to zero if necessary */
4335 /* Middle initialization might not have occurred yet */
4336 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4337 if (__kmp_nth > __kmp_avail_proc) {
4338 __kmp_zero_bt = TRUE;
4339 }
4340 }
4341#endif /* KMP_ADJUST_BLOCKTIME */
4342
4343 /* actually fork it and create the new worker thread */
4344 KF_TRACE(
4345 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4346 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4347 KF_TRACE(10,
4348 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4349
4350 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4351 new_gtid));
4352 KMP_MB();
4353 return new_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004354}
4355
Jonathan Peyton30419822017-05-12 18:01:32 +00004356/* Reinitialize team for reuse.
4357 The hot team code calls this case at every fork barrier, so EPCC barrier
4358 test are extremely sensitive to changes in it, esp. writes to the team
4359 struct, which cause a cache invalidation in all threads.
4360 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
4361static void __kmp_reinitialize_team(kmp_team_t *team,
4362 kmp_internal_control_t *new_icvs,
4363 ident_t *loc) {
4364 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4365 team->t.t_threads[0], team));
4366 KMP_DEBUG_ASSERT(team && new_icvs);
4367 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4368 KMP_CHECK_UPDATE(team->t.t_ident, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004369
Jonathan Peyton30419822017-05-12 18:01:32 +00004370 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
Jonathan Peyton30419822017-05-12 18:01:32 +00004371 // Copy ICVs to the master thread's implicit taskdata
4372 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4373 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
Jim Cownie181b4bb2013-12-23 17:28:57 +00004374
Jonathan Peyton30419822017-05-12 18:01:32 +00004375 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4376 team->t.t_threads[0], team));
Jim Cownie181b4bb2013-12-23 17:28:57 +00004377}
4378
Jonathan Peyton30419822017-05-12 18:01:32 +00004379/* Initialize the team data structure.
4380 This assumes the t_threads and t_max_nproc are already set.
4381 Also, we don't touch the arguments */
4382static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
4383 kmp_internal_control_t *new_icvs,
4384 ident_t *loc) {
4385 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004386
Jonathan Peyton30419822017-05-12 18:01:32 +00004387 /* verify */
4388 KMP_DEBUG_ASSERT(team);
4389 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4390 KMP_DEBUG_ASSERT(team->t.t_threads);
4391 KMP_MB();
Jim Cownie181b4bb2013-12-23 17:28:57 +00004392
Jonathan Peyton30419822017-05-12 18:01:32 +00004393 team->t.t_master_tid = 0; /* not needed */
4394 /* team->t.t_master_bar; not needed */
4395 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4396 team->t.t_nproc = new_nproc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004397
Jonathan Peyton30419822017-05-12 18:01:32 +00004398 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */
4399 team->t.t_next_pool = NULL;
4400 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess
4401 * up hot team */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004402
Jonathan Peyton30419822017-05-12 18:01:32 +00004403 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
4404 team->t.t_invoke = NULL; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004405
Jonathan Peyton30419822017-05-12 18:01:32 +00004406 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4407 team->t.t_sched = new_icvs->sched;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004408
4409#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00004410 team->t.t_fp_control_saved = FALSE; /* not needed */
4411 team->t.t_x87_fpu_control_word = 0; /* not needed */
4412 team->t.t_mxcsr = 0; /* not needed */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004413#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
4414
Jonathan Peyton30419822017-05-12 18:01:32 +00004415 team->t.t_construct = 0;
4416 __kmp_init_lock(&team->t.t_single_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004417
Jonathan Peyton30419822017-05-12 18:01:32 +00004418 team->t.t_ordered.dt.t_value = 0;
4419 team->t.t_master_active = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004420
Jonathan Peyton30419822017-05-12 18:01:32 +00004421 memset(&team->t.t_taskq, '\0', sizeof(kmp_taskq_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004422
4423#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004424 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004425#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004426 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004427
Jonathan Peyton30419822017-05-12 18:01:32 +00004428 team->t.t_control_stack_top = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004429
Jonathan Peyton30419822017-05-12 18:01:32 +00004430 __kmp_reinitialize_team(team, new_icvs, loc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004431
Jonathan Peyton30419822017-05-12 18:01:32 +00004432 KMP_MB();
4433 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004434}
4435
Alp Toker98758b02014-03-02 04:12:06 +00004436#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004437/* Sets full mask for thread and returns old mask, no changes to structures. */
4438static void
Jonathan Peyton30419822017-05-12 18:01:32 +00004439__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4440 if (KMP_AFFINITY_CAPABLE()) {
4441 int status;
4442 if (old_mask != NULL) {
4443 status = __kmp_get_system_affinity(old_mask, TRUE);
4444 int error = errno;
4445 if (status != 0) {
4446 __kmp_msg(kmp_ms_fatal, KMP_MSG(ChangeThreadAffMaskError),
4447 KMP_ERR(error), __kmp_msg_null);
4448 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004449 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004450 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4451 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004452}
4453#endif
4454
Alp Toker98758b02014-03-02 04:12:06 +00004455#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00004456
Jim Cownie5e8470a2013-09-27 10:38:44 +00004457// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
4458// It calculats the worker + master thread's partition based upon the parent
Alp Toker8f2d3f02014-02-24 10:40:15 +00004459// thread's partition, and binds each worker to a thread in their partition.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004460// The master thread's partition should already include its current binding.
Jonathan Peyton30419822017-05-12 18:01:32 +00004461static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
4462 // Copy the master thread's place partion to the team struct
4463 kmp_info_t *master_th = team->t.t_threads[0];
4464 KMP_DEBUG_ASSERT(master_th != NULL);
4465 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4466 int first_place = master_th->th.th_first_place;
4467 int last_place = master_th->th.th_last_place;
4468 int masters_place = master_th->th.th_current_place;
4469 team->t.t_first_place = first_place;
4470 team->t.t_last_place = last_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004471
Jonathan Peyton30419822017-05-12 18:01:32 +00004472 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4473 "bound to place %d partition = [%d,%d]\n",
4474 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4475 team->t.t_id, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004476
Jonathan Peyton30419822017-05-12 18:01:32 +00004477 switch (proc_bind) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00004478
Jonathan Peyton30419822017-05-12 18:01:32 +00004479 case proc_bind_default:
4480 // serial teams might have the proc_bind policy set to proc_bind_default. It
4481 // doesn't matter, as we don't rebind master thread for any proc_bind policy
4482 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4483 break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004484
Jonathan Peyton30419822017-05-12 18:01:32 +00004485 case proc_bind_master: {
4486 int f;
4487 int n_th = team->t.t_nproc;
4488 for (f = 1; f < n_th; f++) {
4489 kmp_info_t *th = team->t.t_threads[f];
4490 KMP_DEBUG_ASSERT(th != NULL);
4491 th->th.th_first_place = first_place;
4492 th->th.th_last_place = last_place;
4493 th->th.th_new_place = masters_place;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004494
Jonathan Peyton30419822017-05-12 18:01:32 +00004495 KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d "
4496 "partition = [%d,%d]\n",
4497 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4498 f, masters_place, first_place, last_place));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004499 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004500 } break;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004501
Jonathan Peyton30419822017-05-12 18:01:32 +00004502 case proc_bind_close: {
4503 int f;
4504 int n_th = team->t.t_nproc;
4505 int n_places;
4506 if (first_place <= last_place) {
4507 n_places = last_place - first_place + 1;
4508 } else {
4509 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4510 }
4511 if (n_th <= n_places) {
4512 int place = masters_place;
4513 for (f = 1; f < n_th; f++) {
4514 kmp_info_t *th = team->t.t_threads[f];
4515 KMP_DEBUG_ASSERT(th != NULL);
4516
4517 if (place == last_place) {
4518 place = first_place;
4519 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4520 place = 0;
4521 } else {
4522 place++;
4523 }
4524 th->th.th_first_place = first_place;
4525 th->th.th_last_place = last_place;
4526 th->th.th_new_place = place;
4527
4528 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4529 "partition = [%d,%d]\n",
4530 __kmp_gtid_from_thread(team->t.t_threads[f]),
4531 team->t.t_id, f, place, first_place, last_place));
4532 }
4533 } else {
4534 int S, rem, gap, s_count;
4535 S = n_th / n_places;
4536 s_count = 0;
4537 rem = n_th - (S * n_places);
4538 gap = rem > 0 ? n_places / rem : n_places;
4539 int place = masters_place;
4540 int gap_ct = gap;
4541 for (f = 0; f < n_th; f++) {
4542 kmp_info_t *th = team->t.t_threads[f];
4543 KMP_DEBUG_ASSERT(th != NULL);
4544
4545 th->th.th_first_place = first_place;
4546 th->th.th_last_place = last_place;
4547 th->th.th_new_place = place;
4548 s_count++;
4549
4550 if ((s_count == S) && rem && (gap_ct == gap)) {
4551 // do nothing, add an extra thread to place on next iteration
4552 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4553 // we added an extra thread to this place; move to next place
4554 if (place == last_place) {
4555 place = first_place;
4556 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4557 place = 0;
4558 } else {
4559 place++;
4560 }
4561 s_count = 0;
4562 gap_ct = 1;
4563 rem--;
4564 } else if (s_count == S) { // place full; don't add extra
4565 if (place == last_place) {
4566 place = first_place;
4567 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4568 place = 0;
4569 } else {
4570 place++;
4571 }
4572 gap_ct++;
4573 s_count = 0;
4574 }
4575
4576 KA_TRACE(100,
4577 ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
4578 "partition = [%d,%d]\n",
4579 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4580 th->th.th_new_place, first_place, last_place));
4581 }
4582 KMP_DEBUG_ASSERT(place == masters_place);
4583 }
4584 } break;
4585
4586 case proc_bind_spread: {
4587 int f;
4588 int n_th = team->t.t_nproc;
4589 int n_places;
4590 int thidx;
4591 if (first_place <= last_place) {
4592 n_places = last_place - first_place + 1;
4593 } else {
4594 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4595 }
4596 if (n_th <= n_places) {
4597 int place = masters_place;
4598 int S = n_places / n_th;
4599 int s_count, rem, gap, gap_ct;
4600 rem = n_places - n_th * S;
4601 gap = rem ? n_th / rem : 1;
4602 gap_ct = gap;
4603 thidx = n_th;
4604 if (update_master_only == 1)
4605 thidx = 1;
4606 for (f = 0; f < thidx; f++) {
4607 kmp_info_t *th = team->t.t_threads[f];
4608 KMP_DEBUG_ASSERT(th != NULL);
4609
4610 th->th.th_first_place = place;
4611 th->th.th_new_place = place;
4612 s_count = 1;
4613 while (s_count < S) {
4614 if (place == last_place) {
4615 place = first_place;
4616 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4617 place = 0;
4618 } else {
4619 place++;
4620 }
4621 s_count++;
4622 }
4623 if (rem && (gap_ct == gap)) {
4624 if (place == last_place) {
4625 place = first_place;
4626 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4627 place = 0;
4628 } else {
4629 place++;
4630 }
4631 rem--;
4632 gap_ct = 0;
4633 }
4634 th->th.th_last_place = place;
4635 gap_ct++;
4636
4637 if (place == last_place) {
4638 place = first_place;
4639 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4640 place = 0;
4641 } else {
4642 place++;
4643 }
4644
4645 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4646 "partition = [%d,%d]\n",
4647 __kmp_gtid_from_thread(team->t.t_threads[f]),
4648 team->t.t_id, f, th->th.th_new_place,
4649 th->th.th_first_place, th->th.th_last_place));
4650 }
4651 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4652 } else {
4653 int S, rem, gap, s_count;
4654 S = n_th / n_places;
4655 s_count = 0;
4656 rem = n_th - (S * n_places);
4657 gap = rem > 0 ? n_places / rem : n_places;
4658 int place = masters_place;
4659 int gap_ct = gap;
4660 thidx = n_th;
4661 if (update_master_only == 1)
4662 thidx = 1;
4663 for (f = 0; f < thidx; f++) {
4664 kmp_info_t *th = team->t.t_threads[f];
4665 KMP_DEBUG_ASSERT(th != NULL);
4666
4667 th->th.th_first_place = place;
4668 th->th.th_last_place = place;
4669 th->th.th_new_place = place;
4670 s_count++;
4671
4672 if ((s_count == S) && rem && (gap_ct == gap)) {
4673 // do nothing, add an extra thread to place on next iteration
4674 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4675 // we added an extra thread to this place; move on to next place
4676 if (place == last_place) {
4677 place = first_place;
4678 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4679 place = 0;
4680 } else {
4681 place++;
4682 }
4683 s_count = 0;
4684 gap_ct = 1;
4685 rem--;
4686 } else if (s_count == S) { // place is full; don't add extra thread
4687 if (place == last_place) {
4688 place = first_place;
4689 } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
4690 place = 0;
4691 } else {
4692 place++;
4693 }
4694 gap_ct++;
4695 s_count = 0;
4696 }
4697
4698 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4699 "partition = [%d,%d]\n",
4700 __kmp_gtid_from_thread(team->t.t_threads[f]),
4701 team->t.t_id, f, th->th.th_new_place,
4702 th->th.th_first_place, th->th.th_last_place));
4703 }
4704 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4705 }
4706 } break;
4707
4708 default:
4709 break;
4710 }
4711
4712 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004713}
4714
Alp Toker98758b02014-03-02 04:12:06 +00004715#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004716
Jonathan Peyton30419822017-05-12 18:01:32 +00004717/* allocate a new team data structure to use. take one off of the free pool if
4718 available */
Jim Cownie5e8470a2013-09-27 10:38:44 +00004719kmp_team_t *
Jonathan Peyton30419822017-05-12 18:01:32 +00004720__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004721#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00004722 ompt_parallel_id_t ompt_parallel_id,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00004723#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004724#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004725 kmp_proc_bind_t new_proc_bind,
Jim Cownie5e8470a2013-09-27 10:38:44 +00004726#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004727 kmp_internal_control_t *new_icvs,
4728 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4729 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4730 int f;
4731 kmp_team_t *team;
4732 int use_hot_team = !root->r.r_active;
4733 int level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004734
Jonathan Peyton30419822017-05-12 18:01:32 +00004735 KA_TRACE(20, ("__kmp_allocate_team: called\n"));
4736 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4737 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4738 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004739
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004740#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004741 kmp_hot_team_ptr_t *hot_teams;
4742 if (master) {
4743 team = master->th.th_team;
4744 level = team->t.t_active_level;
4745 if (master->th.th_teams_microtask) { // in teams construct?
4746 if (master->th.th_teams_size.nteams > 1 &&
4747 ( // #teams > 1
4748 team->t.t_pkfn ==
4749 (microtask_t)__kmp_teams_master || // inner fork of the teams
4750 master->th.th_teams_level <
4751 team->t.t_level)) { // or nested parallel inside the teams
4752 ++level; // not increment if #teams==1, or for outer fork of the teams;
4753 // increment otherwise
4754 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004755 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004756 hot_teams = master->th.th_hot_teams;
4757 if (level < __kmp_hot_teams_max_level && hot_teams &&
4758 hot_teams[level]
4759 .hot_team) { // hot team has already been allocated for given level
4760 use_hot_team = 1;
4761 } else {
4762 use_hot_team = 0;
4763 }
4764 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004765#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004766 // Optimization to use a "hot" team
4767 if (use_hot_team && new_nproc > 1) {
4768 KMP_DEBUG_ASSERT(new_nproc == max_nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004769#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004770 team = hot_teams[level].hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004771#else
Jonathan Peyton30419822017-05-12 18:01:32 +00004772 team = root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004773#endif
4774#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004775 if (__kmp_tasking_mode != tskm_immediate_exec) {
4776 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
4777 "task_team[1] = %p before reinit\n",
4778 team->t.t_task_team[0], team->t.t_task_team[1]));
4779 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004780#endif
4781
Jonathan Peyton30419822017-05-12 18:01:32 +00004782 // Has the number of threads changed?
4783 /* Let's assume the most common case is that the number of threads is
4784 unchanged, and put that case first. */
4785 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
4786 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"));
4787 // This case can mean that omp_set_num_threads() was called and the hot
Jonathan Peyton642688b2017-06-01 16:46:36 +00004788 // team size was already reduced, so we check the special flag
Jonathan Peyton30419822017-05-12 18:01:32 +00004789 if (team->t.t_size_changed == -1) {
4790 team->t.t_size_changed = 1;
4791 } else {
4792 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4793 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004794
Jonathan Peyton30419822017-05-12 18:01:32 +00004795 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4796 kmp_r_sched_t new_sched = new_icvs->sched;
4797 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4798 team->t.t_sched.chunk != new_sched.chunk)
4799 team->t.t_sched =
4800 new_sched; // set master's schedule as new run-time schedule
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004801
Jonathan Peyton30419822017-05-12 18:01:32 +00004802 __kmp_reinitialize_team(team, new_icvs,
4803 root->r.r_uber_thread->th.th_ident);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004804
Jonathan Peyton30419822017-05-12 18:01:32 +00004805 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4806 team->t.t_threads[0], team));
4807 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004808
4809#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004810#if KMP_AFFINITY_SUPPORTED
4811 if ((team->t.t_size_changed == 0) &&
4812 (team->t.t_proc_bind == new_proc_bind)) {
4813 if (new_proc_bind == proc_bind_spread) {
4814 __kmp_partition_places(
4815 team, 1); // add flag to update only master for spread
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004816 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004817 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "
4818 "proc_bind = %d, partition = [%d,%d]\n",
4819 team->t.t_id, new_proc_bind, team->t.t_first_place,
4820 team->t.t_last_place));
4821 } else {
4822 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4823 __kmp_partition_places(team);
4824 }
4825#else
4826 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4827#endif /* KMP_AFFINITY_SUPPORTED */
4828#endif /* OMP_40_ENABLED */
4829 } else if (team->t.t_nproc > new_nproc) {
4830 KA_TRACE(20,
4831 ("__kmp_allocate_team: decreasing hot team thread count to %d\n",
4832 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004833
Jonathan Peyton30419822017-05-12 18:01:32 +00004834 team->t.t_size_changed = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004835#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004836 if (__kmp_hot_teams_mode == 0) {
4837 // AC: saved number of threads should correspond to team's value in this
4838 // mode, can be bigger in mode 1, when hot team has threads in reserve
4839 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4840 hot_teams[level].hot_team_nth = new_nproc;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004841#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004842 /* release the extra threads we don't need any more */
4843 for (f = new_nproc; f < team->t.t_nproc; f++) {
4844 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4845 if (__kmp_tasking_mode != tskm_immediate_exec) {
4846 // When decreasing team size, threads no longer in the team should
4847 // unref task team.
4848 team->t.t_threads[f]->th.th_task_team = NULL;
4849 }
4850 __kmp_free_thread(team->t.t_threads[f]);
4851 team->t.t_threads[f] = NULL;
4852 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004853#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004854 } // (__kmp_hot_teams_mode == 0)
4855 else {
4856 // When keeping extra threads in team, switch threads to wait on own
4857 // b_go flag
4858 for (f = new_nproc; f < team->t.t_nproc; ++f) {
4859 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4860 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4861 for (int b = 0; b < bs_last_barrier; ++b) {
4862 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4863 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
Andrey Churbanovd6e1d7e2016-08-11 13:04:00 +00004864 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004865 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4866 }
4867 }
4868 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004869#endif // KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004870 team->t.t_nproc = new_nproc;
4871 // TODO???: team->t.t_max_active_levels = new_max_active_levels;
4872 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4873 team->t.t_sched.chunk != new_icvs->sched.chunk)
4874 team->t.t_sched = new_icvs->sched;
4875 __kmp_reinitialize_team(team, new_icvs,
4876 root->r.r_uber_thread->th.th_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004877
Jonathan Peyton30419822017-05-12 18:01:32 +00004878 /* update the remaining threads */
4879 for (f = 0; f < new_nproc; ++f) {
4880 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4881 }
4882 // restore the current task state of the master thread: should be the
4883 // implicit task
4884 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
4885 team->t.t_threads[0], team));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004886
Jonathan Peyton30419822017-05-12 18:01:32 +00004887 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004888
4889#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00004890 for (f = 0; f < team->t.t_nproc; f++) {
4891 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
4892 team->t.t_threads[f]->th.th_team_nproc ==
4893 team->t.t_nproc);
4894 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004895#endif
4896
4897#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00004898 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4899#if KMP_AFFINITY_SUPPORTED
4900 __kmp_partition_places(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00004901#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00004902#endif
4903 } else { // team->t.t_nproc < new_nproc
Alp Toker98758b02014-03-02 04:12:06 +00004904#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00004905 kmp_affin_mask_t *old_mask;
4906 if (KMP_AFFINITY_CAPABLE()) {
4907 KMP_CPU_ALLOC(old_mask);
4908 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00004909#endif
4910
Jonathan Peyton30419822017-05-12 18:01:32 +00004911 KA_TRACE(20,
4912 ("__kmp_allocate_team: increasing hot team thread count to %d\n",
4913 new_nproc));
Jim Cownie5e8470a2013-09-27 10:38:44 +00004914
Jonathan Peyton30419822017-05-12 18:01:32 +00004915 team->t.t_size_changed = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00004916
Jim Cownie4cc4bb42014-10-07 16:25:50 +00004917#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00004918 int avail_threads = hot_teams[level].hot_team_nth;
4919 if (new_nproc < avail_threads)
4920 avail_threads = new_nproc;
4921 kmp_info_t **other_threads = team->t.t_threads;
4922 for (f = team->t.t_nproc; f < avail_threads; ++f) {
4923 // Adjust barrier data of reserved threads (if any) of the team
4924 // Other data will be set in __kmp_initialize_info() below.
Jim Cownie5e8470a2013-09-27 10:38:44 +00004925 int b;
Jonathan Peyton30419822017-05-12 18:01:32 +00004926 kmp_balign_t *balign = other_threads[f]->th.th_bar;
4927 for (b = 0; b < bs_last_barrier; ++b) {
4928 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4929 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004930#if USE_DEBUGGER
Jonathan Peyton30419822017-05-12 18:01:32 +00004931 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00004932#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00004933 }
Jonathan Peyton30419822017-05-12 18:01:32 +00004934 }
4935 if (hot_teams[level].hot_team_nth >= new_nproc) {
4936 // we have all needed threads in reserve, no need to allocate any
4937 // this only possible in mode 1, cannot have reserved threads in mode 0
4938 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4939 team->t.t_nproc = new_nproc; // just get reserved threads involved
4940 } else {
4941 // we may have some threads in reserve, but not enough
4942 team->t.t_nproc =
4943 hot_teams[level]
4944 .hot_team_nth; // get reserved threads involved if any
4945 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
4946#endif // KMP_NESTED_HOT_TEAMS
4947 if (team->t.t_max_nproc < new_nproc) {
4948 /* reallocate larger arrays */
4949 __kmp_reallocate_team_arrays(team, new_nproc);
4950 __kmp_reinitialize_team(team, new_icvs, NULL);
4951 }
4952
4953#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4954 /* Temporarily set full mask for master thread before creation of
4955 workers. The reason is that workers inherit the affinity from master,
4956 so if a lot of workers are created on the single core quickly, they
4957 don't get a chance to set their own affinity for a long time. */
4958 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
4959#endif
4960
4961 /* allocate new threads for the hot team */
4962 for (f = team->t.t_nproc; f < new_nproc; f++) {
4963 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
4964 KMP_DEBUG_ASSERT(new_worker);
4965 team->t.t_threads[f] = new_worker;
4966
4967 KA_TRACE(20,
4968 ("__kmp_allocate_team: team %d init T#%d arrived: "
4969 "join=%llu, plain=%llu\n",
4970 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
4971 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4972 team->t.t_bar[bs_plain_barrier].b_arrived));
4973
4974 { // Initialize barrier data for new threads.
4975 int b;
4976 kmp_balign_t *balign = new_worker->th.th_bar;
4977 for (b = 0; b < bs_last_barrier; ++b) {
4978 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4979 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
4980 KMP_BARRIER_PARENT_FLAG);
4981#if USE_DEBUGGER
4982 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4983#endif
4984 }
4985 }
4986 }
4987
4988#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4989 if (KMP_AFFINITY_CAPABLE()) {
4990 /* Restore initial master thread's affinity mask */
4991 __kmp_set_system_affinity(old_mask, TRUE);
4992 KMP_CPU_FREE(old_mask);
4993 }
4994#endif
4995#if KMP_NESTED_HOT_TEAMS
4996 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth
4997#endif // KMP_NESTED_HOT_TEAMS
4998 /* make sure everyone is syncronized */
4999 int old_nproc = team->t.t_nproc; // save old value and use to update only
5000 // new threads below
5001 __kmp_initialize_team(team, new_nproc, new_icvs,
5002 root->r.r_uber_thread->th.th_ident);
5003
5004 /* reinitialize the threads */
5005 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5006 for (f = 0; f < team->t.t_nproc; ++f)
5007 __kmp_initialize_info(team->t.t_threads[f], team, f,
5008 __kmp_gtid_from_tid(f, team));
5009 if (level) { // set th_task_state for new threads in nested hot team
5010 // __kmp_initialize_info() no longer zeroes th_task_state, so we should
5011 // only need to set the th_task_state for the new threads. th_task_state
5012 // for master thread will not be accurate until after this in
5013 // __kmp_fork_call(), so we look to the master's memo_stack to get the
5014 // correct value.
5015 for (f = old_nproc; f < team->t.t_nproc; ++f)
5016 team->t.t_threads[f]->th.th_task_state =
5017 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5018 } else { // set th_task_state for new threads in non-nested hot team
5019 int old_state =
5020 team->t.t_threads[0]->th.th_task_state; // copy master's state
5021 for (f = old_nproc; f < team->t.t_nproc; ++f)
5022 team->t.t_threads[f]->th.th_task_state = old_state;
5023 }
5024
5025#ifdef KMP_DEBUG
5026 for (f = 0; f < team->t.t_nproc; ++f) {
5027 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5028 team->t.t_threads[f]->th.th_team_nproc ==
5029 team->t.t_nproc);
5030 }
5031#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005032
5033#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00005034 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5035#if KMP_AFFINITY_SUPPORTED
5036 __kmp_partition_places(team);
5037#endif
5038#endif
5039 } // Check changes in number of threads
5040
5041#if OMP_40_ENABLED
5042 kmp_info_t *master = team->t.t_threads[0];
5043 if (master->th.th_teams_microtask) {
5044 for (f = 1; f < new_nproc; ++f) {
5045 // propagate teams construct specific info to workers
5046 kmp_info_t *thr = team->t.t_threads[f];
5047 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5048 thr->th.th_teams_level = master->th.th_teams_level;
5049 thr->th.th_teams_size = master->th.th_teams_size;
5050 }
5051 }
5052#endif /* OMP_40_ENABLED */
5053#if KMP_NESTED_HOT_TEAMS
5054 if (level) {
5055 // Sync barrier state for nested hot teams, not needed for outermost hot
5056 // team.
5057 for (f = 1; f < new_nproc; ++f) {
5058 kmp_info_t *thr = team->t.t_threads[f];
5059 int b;
5060 kmp_balign_t *balign = thr->th.th_bar;
5061 for (b = 0; b < bs_last_barrier; ++b) {
5062 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5063 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5064#if USE_DEBUGGER
5065 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5066#endif
5067 }
5068 }
5069 }
5070#endif // KMP_NESTED_HOT_TEAMS
5071
5072 /* reallocate space for arguments if necessary */
5073 __kmp_alloc_argv_entries(argc, team, TRUE);
5074 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5075 // The hot team re-uses the previous task team,
5076 // if untouched during the previous release->gather phase.
5077
5078 KF_TRACE(10, (" hot_team = %p\n", team));
5079
5080#if KMP_DEBUG
5081 if (__kmp_tasking_mode != tskm_immediate_exec) {
5082 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
5083 "task_team[1] = %p after reinit\n",
5084 team->t.t_task_team[0], team->t.t_task_team[1]));
5085 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005086#endif
5087
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005088#if OMPT_SUPPORT
5089 __ompt_team_assign_id(team, ompt_parallel_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005090#endif
5091
Jim Cownie5e8470a2013-09-27 10:38:44 +00005092 KMP_MB();
5093
Jim Cownie5e8470a2013-09-27 10:38:44 +00005094 return team;
Jonathan Peyton30419822017-05-12 18:01:32 +00005095 }
5096
5097 /* next, let's try to take one from the team pool */
5098 KMP_MB();
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005099 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005100 /* TODO: consider resizing undersized teams instead of reaping them, now
5101 that we have a resizing mechanism */
5102 if (team->t.t_max_nproc >= max_nproc) {
5103 /* take this team from the team pool */
5104 __kmp_team_pool = team->t.t_next_pool;
5105
5106 /* setup the team for fresh use */
5107 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5108
5109 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "
5110 "task_team[1] %p to NULL\n",
5111 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5112 team->t.t_task_team[0] = NULL;
5113 team->t.t_task_team[1] = NULL;
5114
5115 /* reallocate space for arguments if necessary */
5116 __kmp_alloc_argv_entries(argc, team, TRUE);
5117 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5118
5119 KA_TRACE(
5120 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5121 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5122 { // Initialize barrier data.
5123 int b;
5124 for (b = 0; b < bs_last_barrier; ++b) {
5125 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5126#if USE_DEBUGGER
5127 team->t.t_bar[b].b_master_arrived = 0;
5128 team->t.t_bar[b].b_team_arrived = 0;
5129#endif
5130 }
5131 }
5132
5133#if OMP_40_ENABLED
5134 team->t.t_proc_bind = new_proc_bind;
5135#endif
5136
5137 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",
5138 team->t.t_id));
5139
5140#if OMPT_SUPPORT
5141 __ompt_team_assign_id(team, ompt_parallel_id);
5142#endif
5143
5144 KMP_MB();
5145
5146 return team;
5147 }
5148
5149/* reap team if it is too small, then loop back and check the next one */
5150// not sure if this is wise, but, will be redone during the hot-teams rewrite.
5151/* TODO: Use technique to find the right size hot-team, don't reap them */
5152 team = __kmp_reap_team(team);
5153 __kmp_team_pool = team;
5154 }
5155
5156 /* nothing available in the pool, no matter, make a new team! */
5157 KMP_MB();
5158 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t));
5159
5160 /* and set it up */
5161 team->t.t_max_nproc = max_nproc;
5162 /* NOTE well, for some reason allocating one big buffer and dividing it up
5163 seems to really hurt performance a lot on the P4, so, let's not use this */
5164 __kmp_allocate_team_arrays(team, max_nproc);
5165
5166 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"));
5167 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5168
5169 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5170 "%p to NULL\n",
5171 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5172 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes
5173 // memory, no need to duplicate
5174 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes
5175 // memory, no need to duplicate
5176
5177 if (__kmp_storage_map) {
5178 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);
5179 }
5180
5181 /* allocate space for arguments */
5182 __kmp_alloc_argv_entries(argc, team, FALSE);
5183 team->t.t_argc = argc;
5184
5185 KA_TRACE(20,
5186 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5187 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5188 { // Initialize barrier data.
5189 int b;
5190 for (b = 0; b < bs_last_barrier; ++b) {
5191 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5192#if USE_DEBUGGER
5193 team->t.t_bar[b].b_master_arrived = 0;
5194 team->t.t_bar[b].b_team_arrived = 0;
5195#endif
5196 }
5197 }
5198
5199#if OMP_40_ENABLED
5200 team->t.t_proc_bind = new_proc_bind;
5201#endif
5202
5203#if OMPT_SUPPORT
5204 __ompt_team_assign_id(team, ompt_parallel_id);
5205 team->t.ompt_serialized_team_info = NULL;
5206#endif
5207
5208 KMP_MB();
5209
5210 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",
5211 team->t.t_id));
5212
5213 return team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005214}
5215
5216/* TODO implement hot-teams at all levels */
5217/* TODO implement lazy thread release on demand (disband request) */
5218
5219/* free the team. return it to the team pool. release all the threads
5220 * associated with it */
Jonathan Peyton30419822017-05-12 18:01:32 +00005221void __kmp_free_team(kmp_root_t *root,
5222 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5223 int f;
5224 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5225 team->t.t_id));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005226
Jonathan Peyton30419822017-05-12 18:01:32 +00005227 /* verify state */
5228 KMP_DEBUG_ASSERT(root);
5229 KMP_DEBUG_ASSERT(team);
5230 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5231 KMP_DEBUG_ASSERT(team->t.t_threads);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005232
Jonathan Peyton30419822017-05-12 18:01:32 +00005233 int use_hot_team = team == root->r.r_hot_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005234#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005235 int level;
5236 kmp_hot_team_ptr_t *hot_teams;
5237 if (master) {
5238 level = team->t.t_active_level - 1;
5239 if (master->th.th_teams_microtask) { // in teams construct?
5240 if (master->th.th_teams_size.nteams > 1) {
5241 ++level; // level was not increased in teams construct for
5242 // team_of_masters
5243 }
5244 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5245 master->th.th_teams_level == team->t.t_level) {
5246 ++level; // level was not increased in teams construct for
5247 // team_of_workers before the parallel
5248 } // team->t.t_level will be increased inside parallel
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005249 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005250 hot_teams = master->th.th_hot_teams;
5251 if (level < __kmp_hot_teams_max_level) {
5252 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5253 use_hot_team = 1;
5254 }
5255 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005256#endif // KMP_NESTED_HOT_TEAMS
5257
Jonathan Peyton30419822017-05-12 18:01:32 +00005258 /* team is done working */
5259 TCW_SYNC_PTR(team->t.t_pkfn,
5260 NULL); // Important for Debugging Support Library.
5261 team->t.t_copyin_counter = 0; // init counter for possible reuse
5262 // Do not reset pointer to parent team to NULL for hot teams.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005263
Jonathan Peyton30419822017-05-12 18:01:32 +00005264 /* if we are non-hot team, release our threads */
5265 if (!use_hot_team) {
5266 if (__kmp_tasking_mode != tskm_immediate_exec) {
5267 // Wait for threads to reach reapable state
5268 for (f = 1; f < team->t.t_nproc; ++f) {
5269 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5270 kmp_info_t *th = team->t.t_threads[f];
5271 volatile kmp_uint32 *state = &th->th.th_reap_state;
5272 while (*state != KMP_SAFE_TO_REAP) {
Andrey Churbanov581490e2017-02-06 18:53:32 +00005273#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +00005274 // On Windows a thread can be killed at any time, check this
5275 DWORD ecode;
5276 if (!__kmp_is_thread_alive(th, &ecode)) {
5277 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread
5278 break;
5279 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005280#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005281 // first check if thread is sleeping
5282 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5283 if (fl.is_sleeping())
5284 fl.resume(__kmp_gtid_from_thread(th));
5285 KMP_CPU_PAUSE();
5286 }
5287 }
Andrey Churbanov581490e2017-02-06 18:53:32 +00005288
Jonathan Peyton30419822017-05-12 18:01:32 +00005289 // Delete task teams
5290 int tt_idx;
5291 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5292 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5293 if (task_team != NULL) {
5294 for (f = 0; f < team->t.t_nproc;
5295 ++f) { // Have all threads unref task teams
5296 team->t.t_threads[f]->th.th_task_team = NULL;
5297 }
5298 KA_TRACE(
5299 20,
5300 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5301 __kmp_get_gtid(), task_team, team->t.t_id));
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005302#if KMP_NESTED_HOT_TEAMS
Jonathan Peyton30419822017-05-12 18:01:32 +00005303 __kmp_free_task_team(master, task_team);
Jonathan Peytonbaaccfa2015-11-16 22:48:41 +00005304#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00005305 team->t.t_task_team[tt_idx] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005306 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005307 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005308 }
5309
Jonathan Peyton30419822017-05-12 18:01:32 +00005310 // Reset pointer to parent team only for non-hot teams.
5311 team->t.t_parent = NULL;
5312 team->t.t_level = 0;
5313 team->t.t_active_level = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005314
Jonathan Peyton30419822017-05-12 18:01:32 +00005315 /* free the worker threads */
5316 for (f = 1; f < team->t.t_nproc; ++f) {
5317 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5318 __kmp_free_thread(team->t.t_threads[f]);
5319 team->t.t_threads[f] = NULL;
5320 }
5321
5322 /* put the team back in the team pool */
5323 /* TODO limit size of team pool, call reap_team if pool too large */
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005324 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005325 __kmp_team_pool = (volatile kmp_team_t *)team;
5326 }
5327
5328 KMP_MB();
5329}
Jim Cownie5e8470a2013-09-27 10:38:44 +00005330
5331/* reap the team. destroy it, reclaim all its resources and free its memory */
Jonathan Peyton30419822017-05-12 18:01:32 +00005332kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5333 kmp_team_t *next_pool = team->t.t_next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005334
Jonathan Peyton30419822017-05-12 18:01:32 +00005335 KMP_DEBUG_ASSERT(team);
5336 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5337 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5338 KMP_DEBUG_ASSERT(team->t.t_threads);
5339 KMP_DEBUG_ASSERT(team->t.t_argv);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005340
Jonathan Peyton30419822017-05-12 18:01:32 +00005341 /* TODO clean the threads that are a part of this? */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005342
Jonathan Peyton30419822017-05-12 18:01:32 +00005343 /* free stuff */
5344 __kmp_free_team_arrays(team);
5345 if (team->t.t_argv != &team->t.t_inline_argv[0])
5346 __kmp_free((void *)team->t.t_argv);
5347 __kmp_free(team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005348
Jonathan Peyton30419822017-05-12 18:01:32 +00005349 KMP_MB();
5350 return next_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005351}
5352
Jim Cownie5e8470a2013-09-27 10:38:44 +00005353// Free the thread. Don't reap it, just place it on the pool of available
5354// threads.
5355//
5356// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
5357// binding for the affinity mechanism to be useful.
5358//
5359// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
5360// However, we want to avoid a potential performance problem by always
5361// scanning through the list to find the correct point at which to insert
5362// the thread (potential N**2 behavior). To do this we keep track of the
5363// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
5364// With single-level parallelism, threads will always be added to the tail
5365// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
5366// parallelism, all bets are off and we may need to scan through the entire
5367// free list.
5368//
5369// This change also has a potentially large performance benefit, for some
5370// applications. Previously, as threads were freed from the hot team, they
5371// would be placed back on the free list in inverse order. If the hot team
5372// grew back to it's original size, then the freed thread would be placed
5373// back on the hot team in reverse order. This could cause bad cache
5374// locality problems on programs where the size of the hot team regularly
5375// grew and shrunk.
5376//
5377// Now, for single-level parallelism, the OMP tid is alway == gtid.
Jonathan Peyton30419822017-05-12 18:01:32 +00005378void __kmp_free_thread(kmp_info_t *this_th) {
5379 int gtid;
5380 kmp_info_t **scan;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005381
Jonathan Peyton30419822017-05-12 18:01:32 +00005382 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5383 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005384
Jonathan Peyton30419822017-05-12 18:01:32 +00005385 KMP_DEBUG_ASSERT(this_th);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005386
Jonathan Peyton30419822017-05-12 18:01:32 +00005387 // When moving thread to pool, switch thread to wait on own b_go flag, and
5388 // uninitialized (NULL team).
5389 int b;
5390 kmp_balign_t *balign = this_th->th.th_bar;
5391 for (b = 0; b < bs_last_barrier; ++b) {
5392 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5393 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5394 balign[b].bb.team = NULL;
5395 balign[b].bb.leaf_kids = 0;
5396 }
5397 this_th->th.th_task_state = 0;
5398
5399 /* put thread back on the free pool */
5400 TCW_PTR(this_th->th.th_team, NULL);
5401 TCW_PTR(this_th->th.th_root, NULL);
5402 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
5403
5404 // If the __kmp_thread_pool_insert_pt is already past the new insert
5405 // point, then we need to re-scan the entire list.
5406 gtid = this_th->th.th_info.ds.ds_gtid;
5407 if (__kmp_thread_pool_insert_pt != NULL) {
5408 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5409 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5410 __kmp_thread_pool_insert_pt = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005411 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005412 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005413
Jonathan Peyton30419822017-05-12 18:01:32 +00005414 // Scan down the list to find the place to insert the thread.
5415 // scan is the address of a link in the list, possibly the address of
5416 // __kmp_thread_pool itself.
5417 //
5418 // In the absence of nested parallism, the for loop will have 0 iterations.
5419 if (__kmp_thread_pool_insert_pt != NULL) {
5420 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5421 } else {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005422 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005423 }
5424 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5425 scan = &((*scan)->th.th_next_pool))
5426 ;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005427
Jonathan Peyton30419822017-05-12 18:01:32 +00005428 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
5429 // to its address.
5430 TCW_PTR(this_th->th.th_next_pool, *scan);
5431 __kmp_thread_pool_insert_pt = *scan = this_th;
5432 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5433 (this_th->th.th_info.ds.ds_gtid <
5434 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5435 TCW_4(this_th->th.th_in_pool, TRUE);
5436 __kmp_thread_pool_nth++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005437
Jonathan Peyton30419822017-05-12 18:01:32 +00005438 TCW_4(__kmp_nth, __kmp_nth - 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005439
5440#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005441 /* Adjust blocktime back to user setting or default if necessary */
5442 /* Middle initialization might never have occurred */
5443 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5444 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5445 if (__kmp_nth <= __kmp_avail_proc) {
5446 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005447 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005448 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005449#endif /* KMP_ADJUST_BLOCKTIME */
5450
Jonathan Peyton30419822017-05-12 18:01:32 +00005451 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005452}
5453
Jim Cownie5e8470a2013-09-27 10:38:44 +00005454/* ------------------------------------------------------------------------ */
5455
Jonathan Peyton30419822017-05-12 18:01:32 +00005456void *__kmp_launch_thread(kmp_info_t *this_thr) {
5457 int gtid = this_thr->th.th_info.ds.ds_gtid;
5458 /* void *stack_data;*/
5459 kmp_team_t *(*volatile pteam);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005460
Jonathan Peyton30419822017-05-12 18:01:32 +00005461 KMP_MB();
5462 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005463
Jonathan Peyton30419822017-05-12 18:01:32 +00005464 if (__kmp_env_consistency_check) {
5465 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
5466 }
5467
5468#if OMPT_SUPPORT
5469 if (ompt_enabled) {
5470 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5471 this_thr->th.ompt_thread_info.wait_id = 0;
5472 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
5473 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
5474 __ompt_thread_begin(ompt_thread_worker, gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005475 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005476 }
5477#endif
5478
5479 /* This is the place where threads wait for work */
5480 while (!TCR_4(__kmp_global.g.g_done)) {
5481 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5482 KMP_MB();
5483
5484 /* wait for work to do */
5485 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005486
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005487#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00005488 if (ompt_enabled) {
Jonathan Peyton30419822017-05-12 18:01:32 +00005489 this_thr->th.ompt_thread_info.state = ompt_state_idle;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005490 }
5491#endif
5492
Jonathan Peyton30419822017-05-12 18:01:32 +00005493 /* No tid yet since not part of a team */
5494 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5495
5496#if OMPT_SUPPORT
5497 if (ompt_enabled) {
5498 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5499 }
5500#endif
5501
5502 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5503
5504 /* have we been allocated? */
5505 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5506#if OMPT_SUPPORT
5507 ompt_task_info_t *task_info;
5508 ompt_parallel_id_t my_parallel_id;
5509 if (ompt_enabled) {
5510 task_info = __ompt_get_taskinfo(0);
5511 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
5512 }
5513#endif
5514 /* we were just woken up, so run our new task */
5515 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5516 int rc;
5517 KA_TRACE(20,
5518 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5519 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5520 (*pteam)->t.t_pkfn));
5521
5522 updateHWFPControl(*pteam);
5523
5524#if OMPT_SUPPORT
5525 if (ompt_enabled) {
5526 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5527 // Initialize OMPT task id for implicit task.
5528 int tid = __kmp_tid_from_gtid(gtid);
5529 task_info->task_id = __ompt_task_id_new(tid);
5530 }
5531#endif
5532
5533 {
5534 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5535 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
5536 rc = (*pteam)->t.t_invoke(gtid);
5537 }
5538 KMP_ASSERT(rc);
5539
5540#if OMPT_SUPPORT
5541 if (ompt_enabled) {
5542 /* no frame set while outside task */
5543 task_info->frame.exit_runtime_frame = NULL;
5544
5545 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5546 }
5547#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005548 KMP_MB();
Jonathan Peyton30419822017-05-12 18:01:32 +00005549 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5550 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5551 (*pteam)->t.t_pkfn));
5552 }
5553 /* join barrier after parallel region */
5554 __kmp_join_barrier(gtid);
Jonathan Peytonb4c73d82016-01-26 21:45:21 +00005555#if OMPT_SUPPORT && OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00005556 if (ompt_enabled) {
5557 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
5558 // don't access *pteam here: it may have already been freed
5559 // by the master thread behind the barrier (possible race)
5560 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5561 my_parallel_id, task_info->task_id);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005562 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005563 task_info->frame.exit_runtime_frame = NULL;
5564 task_info->task_id = 0;
5565 }
5566#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005567 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005568 }
5569 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005570
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005571#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00005572 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5573 __ompt_thread_end(ompt_thread_worker, gtid);
5574 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005575#endif
5576
Jonathan Peyton30419822017-05-12 18:01:32 +00005577 this_thr->th.th_task_team = NULL;
5578 /* run the destructors for the threadprivate data for this thread */
5579 __kmp_common_destroy_gtid(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005580
Jonathan Peyton30419822017-05-12 18:01:32 +00005581 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
5582 KMP_MB();
5583 return this_thr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005584}
5585
5586/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005587
Jonathan Peyton30419822017-05-12 18:01:32 +00005588void __kmp_internal_end_dest(void *specific_gtid) {
5589#if KMP_COMPILER_ICC
5590#pragma warning(push)
5591#pragma warning(disable : 810) // conversion from "void *" to "int" may lose
5592// significant bits
5593#endif
5594 // Make sure no significant bits are lost
5595 int gtid = (kmp_intptr_t)specific_gtid - 1;
5596#if KMP_COMPILER_ICC
5597#pragma warning(pop)
5598#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005599
Jonathan Peyton30419822017-05-12 18:01:32 +00005600 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid));
5601 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
5602 * this is because 0 is reserved for the nothing-stored case */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005603
Jonathan Peyton30419822017-05-12 18:01:32 +00005604 /* josh: One reason for setting the gtid specific data even when it is being
5605 destroyed by pthread is to allow gtid lookup through thread specific data
5606 (__kmp_gtid_get_specific). Some of the code, especially stat code,
5607 that gets executed in the call to __kmp_internal_end_thread, actually
5608 gets the gtid through the thread specific data. Setting it here seems
5609 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
5610 to run smoothly.
5611 todo: get rid of this after we remove the dependence on
5612 __kmp_gtid_get_specific */
5613 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5614 __kmp_gtid_set_specific(gtid);
5615#ifdef KMP_TDATA_GTID
5616 __kmp_gtid = gtid;
5617#endif
5618 __kmp_internal_end_thread(gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005619}
5620
Jonathan Peyton99016992015-05-26 17:32:53 +00005621#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jim Cownie5e8470a2013-09-27 10:38:44 +00005622
Jonathan Peyton30419822017-05-12 18:01:32 +00005623// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases
5624// destructors work perfectly, but in real libomp.so I have no evidence it is
5625// ever called. However, -fini linker option in makefile.mk works fine.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005626
Jonathan Peyton30419822017-05-12 18:01:32 +00005627__attribute__((destructor)) void __kmp_internal_end_dtor(void) {
5628 __kmp_internal_end_atexit();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005629}
5630
Jonathan Peyton30419822017-05-12 18:01:32 +00005631void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005632
5633#endif
5634
Jonathan Peyton30419822017-05-12 18:01:32 +00005635/* [Windows] josh: when the atexit handler is called, there may still be more
5636 than one thread alive */
5637void __kmp_internal_end_atexit(void) {
5638 KA_TRACE(30, ("__kmp_internal_end_atexit\n"));
5639 /* [Windows]
5640 josh: ideally, we want to completely shutdown the library in this atexit
5641 handler, but stat code that depends on thread specific data for gtid fails
5642 because that data becomes unavailable at some point during the shutdown, so
5643 we call __kmp_internal_end_thread instead. We should eventually remove the
5644 dependency on __kmp_get_specific_gtid in the stat code and use
5645 __kmp_internal_end_library to cleanly shutdown the library.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005646
Jonathan Peyton30419822017-05-12 18:01:32 +00005647 // TODO: Can some of this comment about GVS be removed?
5648 I suspect that the offending stat code is executed when the calling thread
5649 tries to clean up a dead root thread's data structures, resulting in GVS
5650 code trying to close the GVS structures for that thread, but since the stat
5651 code uses __kmp_get_specific_gtid to get the gtid with the assumption that
5652 the calling thread is cleaning up itself instead of another thread, it get
5653 confused. This happens because allowing a thread to unregister and cleanup
5654 another thread is a recent modification for addressing an issue.
5655 Based on the current design (20050722), a thread may end up
5656 trying to unregister another thread only if thread death does not trigger
5657 the calling of __kmp_internal_end_thread. For Linux* OS, there is the
5658 thread specific data destructor function to detect thread death. For
5659 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
5660 is nothing. Thus, the workaround is applicable only for Windows static
5661 stat library. */
5662 __kmp_internal_end_library(-1);
5663#if KMP_OS_WINDOWS
5664 __kmp_close_console();
5665#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005666}
5667
Jonathan Peyton30419822017-05-12 18:01:32 +00005668static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
5669 // It is assumed __kmp_forkjoin_lock is acquired.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005670
Jonathan Peyton30419822017-05-12 18:01:32 +00005671 int gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005672
Jonathan Peyton30419822017-05-12 18:01:32 +00005673 KMP_DEBUG_ASSERT(thread != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005674
Jonathan Peyton30419822017-05-12 18:01:32 +00005675 gtid = thread->th.th_info.ds.ds_gtid;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005676
Jonathan Peyton30419822017-05-12 18:01:32 +00005677 if (!is_root) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00005678
Jonathan Peyton30419822017-05-12 18:01:32 +00005679 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5680 /* Assume the threads are at the fork barrier here */
5681 KA_TRACE(
5682 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5683 gtid));
5684 /* Need release fence here to prevent seg faults for tree forkjoin barrier
5685 * (GEH) */
5686 ANNOTATE_HAPPENS_BEFORE(thread);
5687 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5688 __kmp_release_64(&flag);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005689 }; // if
5690
Jonathan Peyton30419822017-05-12 18:01:32 +00005691 // Terminate OS thread.
5692 __kmp_reap_worker(thread);
Jonathan Peyton7ca7ef02016-11-21 16:18:57 +00005693
Jonathan Peyton30419822017-05-12 18:01:32 +00005694 // The thread was killed asynchronously. If it was actively
5695 // spinning in the thread pool, decrement the global count.
5696 //
5697 // There is a small timing hole here - if the worker thread was just waking
5698 // up after sleeping in the pool, had reset it's th_active_in_pool flag but
5699 // not decremented the global counter __kmp_thread_pool_active_nth yet, then
5700 // the global counter might not get updated.
5701 //
5702 // Currently, this can only happen as the library is unloaded,
5703 // so there are no harmful side effects.
5704 if (thread->th.th_active_in_pool) {
5705 thread->th.th_active_in_pool = FALSE;
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00005706 KMP_TEST_THEN_DEC32(&__kmp_thread_pool_active_nth);
Jonathan Peyton30419822017-05-12 18:01:32 +00005707 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
5708 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005709
Jonathan Peyton30419822017-05-12 18:01:32 +00005710 // Decrement # of [worker] threads in the pool.
5711 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5712 --__kmp_thread_pool_nth;
5713 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00005714
Jonathan Peyton30419822017-05-12 18:01:32 +00005715 __kmp_free_implicit_task(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005716
Jonathan Peyton30419822017-05-12 18:01:32 +00005717// Free the fast memory for tasking
5718#if USE_FAST_MEMORY
5719 __kmp_free_fast_memory(thread);
5720#endif /* USE_FAST_MEMORY */
5721
5722 __kmp_suspend_uninitialize_thread(thread);
5723
5724 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5725 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5726
5727 --__kmp_all_nth;
5728// __kmp_nth was decremented when thread is added to the pool.
Jim Cownie5e8470a2013-09-27 10:38:44 +00005729
5730#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00005731 /* Adjust blocktime back to user setting or default if necessary */
5732 /* Middle initialization might never have occurred */
5733 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5734 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5735 if (__kmp_nth <= __kmp_avail_proc) {
5736 __kmp_zero_bt = FALSE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005737 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005738 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005739#endif /* KMP_ADJUST_BLOCKTIME */
5740
Jonathan Peyton30419822017-05-12 18:01:32 +00005741 /* free the memory being used */
5742 if (__kmp_env_consistency_check) {
5743 if (thread->th.th_cons) {
5744 __kmp_free_cons_stack(thread->th.th_cons);
5745 thread->th.th_cons = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005746 }; // if
Jonathan Peyton30419822017-05-12 18:01:32 +00005747 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005748
Jonathan Peyton30419822017-05-12 18:01:32 +00005749 if (thread->th.th_pri_common != NULL) {
5750 __kmp_free(thread->th.th_pri_common);
5751 thread->th.th_pri_common = NULL;
5752 }; // if
Andrey Churbanov6d224db2015-02-10 18:37:43 +00005753
Jonathan Peyton30419822017-05-12 18:01:32 +00005754 if (thread->th.th_task_state_memo_stack != NULL) {
5755 __kmp_free(thread->th.th_task_state_memo_stack);
5756 thread->th.th_task_state_memo_stack = NULL;
5757 }
5758
5759#if KMP_USE_BGET
5760 if (thread->th.th_local.bget_data != NULL) {
5761 __kmp_finalize_bget(thread);
5762 }; // if
5763#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005764
Alp Toker98758b02014-03-02 04:12:06 +00005765#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00005766 if (thread->th.th_affin_mask != NULL) {
5767 KMP_CPU_FREE(thread->th.th_affin_mask);
5768 thread->th.th_affin_mask = NULL;
5769 }; // if
Alp Toker98758b02014-03-02 04:12:06 +00005770#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00005771
Jonathan Peyton30419822017-05-12 18:01:32 +00005772 __kmp_reap_team(thread->th.th_serial_team);
5773 thread->th.th_serial_team = NULL;
5774 __kmp_free(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005775
Jonathan Peyton30419822017-05-12 18:01:32 +00005776 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005777
5778} // __kmp_reap_thread
5779
Jonathan Peyton30419822017-05-12 18:01:32 +00005780static void __kmp_internal_end(void) {
5781 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005782
Jonathan Peyton30419822017-05-12 18:01:32 +00005783 /* First, unregister the library */
5784 __kmp_unregister_library();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005785
Jonathan Peyton30419822017-05-12 18:01:32 +00005786#if KMP_OS_WINDOWS
5787 /* In Win static library, we can't tell when a root actually dies, so we
5788 reclaim the data structures for any root threads that have died but not
5789 unregistered themselves, in order to shut down cleanly.
5790 In Win dynamic library we also can't tell when a thread dies. */
5791 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of
5792// dead roots
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00005793#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005794
Jonathan Peyton30419822017-05-12 18:01:32 +00005795 for (i = 0; i < __kmp_threads_capacity; i++)
5796 if (__kmp_root[i])
5797 if (__kmp_root[i]->r.r_active)
5798 break;
5799 KMP_MB(); /* Flush all pending memory write invalidates. */
5800 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5801
5802 if (i < __kmp_threads_capacity) {
5803#if KMP_USE_MONITOR
5804 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
5805 KMP_MB(); /* Flush all pending memory write invalidates. */
5806
5807// Need to check that monitor was initialized before reaping it. If we are
5808// called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then
5809// __kmp_monitor will appear to contain valid data, but it is only valid in the
5810// parent process, not the child.
5811 // New behavior (201008): instead of keying off of the flag
5812 // __kmp_init_parallel, the monitor thread creation is keyed off
5813 // of the new flag __kmp_init_monitor.
5814 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5815 if (TCR_4(__kmp_init_monitor)) {
5816 __kmp_reap_monitor(&__kmp_monitor);
5817 TCW_4(__kmp_init_monitor, 0);
5818 }
5819 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5820 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
5821#endif // KMP_USE_MONITOR
5822 } else {
5823/* TODO move this to cleanup code */
5824#ifdef KMP_DEBUG
5825 /* make sure that everything has properly ended */
5826 for (i = 0; i < __kmp_threads_capacity; i++) {
5827 if (__kmp_root[i]) {
5828 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:
5829 // there can be uber threads alive here
5830 KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active?
5831 }
5832 }
5833#endif
5834
5835 KMP_MB();
5836
5837 // Reap the worker threads.
5838 // This is valid for now, but be careful if threads are reaped sooner.
5839 while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool.
5840 // Get the next thread from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005841 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005842 __kmp_thread_pool = thread->th.th_next_pool;
5843 // Reap it.
5844 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
5845 thread->th.th_next_pool = NULL;
5846 thread->th.th_in_pool = FALSE;
5847 __kmp_reap_thread(thread, 0);
5848 }; // while
5849 __kmp_thread_pool_insert_pt = NULL;
5850
5851 // Reap teams.
5852 while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool.
5853 // Get the next team from the pool.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00005854 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
Jonathan Peyton30419822017-05-12 18:01:32 +00005855 __kmp_team_pool = team->t.t_next_pool;
5856 // Reap it.
5857 team->t.t_next_pool = NULL;
5858 __kmp_reap_team(team);
5859 }; // while
5860
5861 __kmp_reap_task_teams();
5862
5863 for (i = 0; i < __kmp_threads_capacity; ++i) {
5864 // TBD: Add some checking...
5865 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
5866 }
5867
5868 /* Make sure all threadprivate destructors get run by joining with all
5869 worker threads before resetting this flag */
5870 TCW_SYNC_4(__kmp_init_common, FALSE);
5871
5872 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"));
5873 KMP_MB();
5874
5875#if KMP_USE_MONITOR
5876 // See note above: One of the possible fixes for CQ138434 / CQ140126
5877 //
5878 // FIXME: push both code fragments down and CSE them?
5879 // push them into __kmp_cleanup() ?
5880 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5881 if (TCR_4(__kmp_init_monitor)) {
5882 __kmp_reap_monitor(&__kmp_monitor);
5883 TCW_4(__kmp_init_monitor, 0);
5884 }
5885 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5886 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));
5887#endif
5888 } /* else !__kmp_global.t_active */
5889 TCW_4(__kmp_init_gtid, FALSE);
5890 KMP_MB(); /* Flush all pending memory write invalidates. */
5891
5892 __kmp_cleanup();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005893#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00005894 ompt_fini();
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00005895#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005896}
5897
Jonathan Peyton30419822017-05-12 18:01:32 +00005898void __kmp_internal_end_library(int gtid_req) {
5899 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
5900 /* this shouldn't be a race condition because __kmp_internal_end() is the
5901 only place to clear __kmp_serial_init */
5902 /* we'll check this later too, after we get the lock */
5903 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
5904 // redundaant, because the next check will work in any case.
5905 if (__kmp_global.g.g_abort) {
5906 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"));
5907 /* TODO abort? */
5908 return;
5909 }
5910 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
5911 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"));
5912 return;
5913 }
5914
5915 KMP_MB(); /* Flush all pending memory write invalidates. */
5916
5917 /* find out who we are and what we should do */
5918 {
5919 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
5920 KA_TRACE(
5921 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
5922 if (gtid == KMP_GTID_SHUTDOWN) {
5923 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "
5924 "already shutdown\n"));
5925 return;
5926 } else if (gtid == KMP_GTID_MONITOR) {
5927 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "
5928 "registered, or system shutdown\n"));
5929 return;
5930 } else if (gtid == KMP_GTID_DNE) {
5931 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "
5932 "shutdown\n"));
5933 /* we don't know who we are, but we may still shutdown the library */
5934 } else if (KMP_UBER_GTID(gtid)) {
5935 /* unregister ourselves as an uber thread. gtid is no longer valid */
5936 if (__kmp_root[gtid]->r.r_active) {
5937 __kmp_global.g.g_abort = -1;
5938 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5939 KA_TRACE(10,
5940 ("__kmp_internal_end_library: root still active, abort T#%d\n",
5941 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005942 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00005943 } else {
5944 KA_TRACE(
5945 10,
5946 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
5947 __kmp_unregister_root_current_thread(gtid);
5948 }
5949 } else {
5950/* worker threads may call this function through the atexit handler, if they
5951 * call exit() */
5952/* For now, skip the usual subsequent processing and just dump the debug buffer.
5953 TODO: do a thorough shutdown instead */
5954#ifdef DUMP_DEBUG_ON_EXIT
5955 if (__kmp_debug_buf)
5956 __kmp_dump_debug_buffer();
5957#endif
5958 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00005959 }
Jonathan Peyton30419822017-05-12 18:01:32 +00005960 }
5961 /* synchronize the termination process */
5962 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005963
Jonathan Peyton30419822017-05-12 18:01:32 +00005964 /* have we already finished */
5965 if (__kmp_global.g.g_abort) {
5966 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"));
5967 /* TODO abort? */
5968 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
5969 return;
5970 }
5971 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
5972 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
5973 return;
5974 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00005975
Jonathan Peyton30419822017-05-12 18:01:32 +00005976 /* We need this lock to enforce mutex between this reading of
5977 __kmp_threads_capacity and the writing by __kmp_register_root.
5978 Alternatively, we can use a counter of roots that is atomically updated by
5979 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
5980 __kmp_internal_end_*. */
5981 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005982
Jonathan Peyton30419822017-05-12 18:01:32 +00005983 /* now we can safely conduct the actual termination */
5984 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00005985
Jonathan Peyton30419822017-05-12 18:01:32 +00005986 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
5987 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00005988
Jonathan Peyton30419822017-05-12 18:01:32 +00005989 KA_TRACE(10, ("__kmp_internal_end_library: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00005990
Jonathan Peyton30419822017-05-12 18:01:32 +00005991#ifdef DUMP_DEBUG_ON_EXIT
5992 if (__kmp_debug_buf)
5993 __kmp_dump_debug_buffer();
5994#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005995
Jonathan Peyton30419822017-05-12 18:01:32 +00005996#if KMP_OS_WINDOWS
5997 __kmp_close_console();
5998#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00005999
Jonathan Peyton30419822017-05-12 18:01:32 +00006000 __kmp_fini_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006001
6002} // __kmp_internal_end_library
6003
Jonathan Peyton30419822017-05-12 18:01:32 +00006004void __kmp_internal_end_thread(int gtid_req) {
6005 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006006
Jonathan Peyton30419822017-05-12 18:01:32 +00006007 /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6008 /* this shouldn't be a race condition because __kmp_internal_end() is the
6009 * only place to clear __kmp_serial_init */
6010 /* we'll check this later too, after we get the lock */
6011 // 2009-09-06: We do not set g_abort without setting g_done. This check looks
6012 // redundant, because the next check will work in any case.
6013 if (__kmp_global.g.g_abort) {
6014 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"));
6015 /* TODO abort? */
6016 return;
6017 }
6018 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6019 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"));
6020 return;
6021 }
6022
6023 KMP_MB(); /* Flush all pending memory write invalidates. */
6024
6025 /* find out who we are and what we should do */
6026 {
6027 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6028 KA_TRACE(10,
6029 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6030 if (gtid == KMP_GTID_SHUTDOWN) {
6031 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "
6032 "already shutdown\n"));
6033 return;
6034 } else if (gtid == KMP_GTID_MONITOR) {
6035 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "
6036 "registered, or system shutdown\n"));
6037 return;
6038 } else if (gtid == KMP_GTID_DNE) {
6039 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "
6040 "shutdown\n"));
6041 return;
6042 /* we don't know who we are */
6043 } else if (KMP_UBER_GTID(gtid)) {
6044 /* unregister ourselves as an uber thread. gtid is no longer valid */
6045 if (__kmp_root[gtid]->r.r_active) {
6046 __kmp_global.g.g_abort = -1;
6047 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6048 KA_TRACE(10,
6049 ("__kmp_internal_end_thread: root still active, abort T#%d\n",
6050 gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006051 return;
Jonathan Peyton30419822017-05-12 18:01:32 +00006052 } else {
6053 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",
6054 gtid));
6055 __kmp_unregister_root_current_thread(gtid);
6056 }
6057 } else {
6058 /* just a worker thread, let's leave */
6059 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6060
6061 if (gtid >= 0) {
6062 __kmp_threads[gtid]->th.th_task_team = NULL;
6063 }
6064
6065 KA_TRACE(10,
6066 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6067 gtid));
6068 return;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006069 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006070 }
6071#if defined KMP_DYNAMIC_LIB
6072 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber
6073 // thread, because we will better shutdown later in the library destructor.
6074 // The reason of this change is performance problem when non-openmp thread in
6075 // a loop forks and joins many openmp threads. We can save a lot of time
6076 // keeping worker threads alive until the program shutdown.
6077 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966)
6078 // and Windows(DPD200287443) that occurs when using critical sections from
6079 // foreign threads.
6080 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6081 return;
6082#endif
6083 /* synchronize the termination process */
6084 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006085
Jonathan Peyton30419822017-05-12 18:01:32 +00006086 /* have we already finished */
6087 if (__kmp_global.g.g_abort) {
6088 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"));
6089 /* TODO abort? */
6090 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6091 return;
6092 }
6093 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6094 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6095 return;
6096 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006097
Jonathan Peyton30419822017-05-12 18:01:32 +00006098 /* We need this lock to enforce mutex between this reading of
6099 __kmp_threads_capacity and the writing by __kmp_register_root.
6100 Alternatively, we can use a counter of roots that is atomically updated by
6101 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and
6102 __kmp_internal_end_*. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006103
Jonathan Peyton30419822017-05-12 18:01:32 +00006104 /* should we finish the run-time? are all siblings done? */
6105 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006106
Jonathan Peyton30419822017-05-12 18:01:32 +00006107 for (i = 0; i < __kmp_threads_capacity; ++i) {
6108 if (KMP_UBER_GTID(i)) {
6109 KA_TRACE(
6110 10,
6111 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6112 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6113 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6114 return;
6115 };
6116 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006117
Jonathan Peyton30419822017-05-12 18:01:32 +00006118 /* now we can safely conduct the actual termination */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006119
Jonathan Peyton30419822017-05-12 18:01:32 +00006120 __kmp_internal_end();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006121
Jonathan Peyton30419822017-05-12 18:01:32 +00006122 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6123 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006124
Jonathan Peyton30419822017-05-12 18:01:32 +00006125 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006126
Jonathan Peyton30419822017-05-12 18:01:32 +00006127#ifdef DUMP_DEBUG_ON_EXIT
6128 if (__kmp_debug_buf)
6129 __kmp_dump_debug_buffer();
6130#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006131} // __kmp_internal_end_thread
6132
Jonathan Peyton30419822017-05-12 18:01:32 +00006133// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006134// Library registration stuff.
6135
Jonathan Peyton30419822017-05-12 18:01:32 +00006136static long __kmp_registration_flag = 0;
6137// Random value used to indicate library initialization.
6138static char *__kmp_registration_str = NULL;
6139// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006140
Jonathan Peyton30419822017-05-12 18:01:32 +00006141static inline char *__kmp_reg_status_name() {
6142 /* On RHEL 3u5 if linked statically, getpid() returns different values in
6143 each thread. If registration and unregistration go in different threads
6144 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env
6145 env var can not be found, because the name will contain different pid. */
6146 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00006147} // __kmp_reg_status_get
6148
Jonathan Peyton30419822017-05-12 18:01:32 +00006149void __kmp_register_library_startup(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006150
Jonathan Peyton30419822017-05-12 18:01:32 +00006151 char *name = __kmp_reg_status_name(); // Name of the environment variable.
6152 int done = 0;
6153 union {
6154 double dtime;
6155 long ltime;
6156 } time;
6157#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6158 __kmp_initialize_system_tick();
6159#endif
6160 __kmp_read_system_time(&time.dtime);
6161 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6162 __kmp_registration_str =
6163 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,
6164 __kmp_registration_flag, KMP_LIBRARY_FILE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006165
Jonathan Peyton30419822017-05-12 18:01:32 +00006166 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,
6167 __kmp_registration_str));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006168
Jonathan Peyton30419822017-05-12 18:01:32 +00006169 while (!done) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006170
Jonathan Peyton30419822017-05-12 18:01:32 +00006171 char *value = NULL; // Actual value of the environment variable.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006172
Jonathan Peyton30419822017-05-12 18:01:32 +00006173 // Set environment variable, but do not overwrite if it is exist.
6174 __kmp_env_set(name, __kmp_registration_str, 0);
6175 // Check the variable is written.
6176 value = __kmp_env_get(name);
6177 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006178
Jonathan Peyton30419822017-05-12 18:01:32 +00006179 done = 1; // Ok, environment variable set successfully, exit the loop.
Jim Cownie5e8470a2013-09-27 10:38:44 +00006180
Jonathan Peyton30419822017-05-12 18:01:32 +00006181 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006182
Jonathan Peyton30419822017-05-12 18:01:32 +00006183 // Oops. Write failed. Another copy of OpenMP RTL is in memory.
6184 // Check whether it alive or dead.
6185 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
6186 char *tail = value;
6187 char *flag_addr_str = NULL;
6188 char *flag_val_str = NULL;
6189 char const *file_name = NULL;
6190 __kmp_str_split(tail, '-', &flag_addr_str, &tail);
6191 __kmp_str_split(tail, '-', &flag_val_str, &tail);
6192 file_name = tail;
6193 if (tail != NULL) {
6194 long *flag_addr = 0;
6195 long flag_val = 0;
6196 KMP_SSCANF(flag_addr_str, "%p", &flag_addr);
6197 KMP_SSCANF(flag_val_str, "%lx", &flag_val);
6198 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {
6199 // First, check whether environment-encoded address is mapped into
6200 // addr space.
6201 // If so, dereference it to see if it still has the right value.
6202 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6203 neighbor = 1;
6204 } else {
6205 // If not, then we know the other copy of the library is no longer
6206 // running.
6207 neighbor = 2;
6208 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00006209 }; // if
Jonathan Peyton30419822017-05-12 18:01:32 +00006210 }; // if
6211 switch (neighbor) {
6212 case 0: // Cannot parse environment variable -- neighbor status unknown.
6213 // Assume it is the incompatible format of future version of the
6214 // library. Assume the other library is alive.
6215 // WARN( ... ); // TODO: Issue a warning.
6216 file_name = "unknown library";
6217 // Attention! Falling to the next case. That's intentional.
6218 case 1: { // Neighbor is alive.
6219 // Check it is allowed.
6220 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");
6221 if (!__kmp_str_match_true(duplicate_ok)) {
6222 // That's not allowed. Issue fatal error.
6223 __kmp_msg(kmp_ms_fatal,
6224 KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6225 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6226 }; // if
6227 KMP_INTERNAL_FREE(duplicate_ok);
6228 __kmp_duplicate_library_ok = 1;
6229 done = 1; // Exit the loop.
6230 } break;
6231 case 2: { // Neighbor is dead.
6232 // Clear the variable and try to register library again.
6233 __kmp_env_unset(name);
6234 } break;
6235 default: { KMP_DEBUG_ASSERT(0); } break;
6236 }; // switch
Jim Cownie5e8470a2013-09-27 10:38:44 +00006237
Jonathan Peyton30419822017-05-12 18:01:32 +00006238 }; // if
6239 KMP_INTERNAL_FREE((void *)value);
6240
6241 }; // while
6242 KMP_INTERNAL_FREE((void *)name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006243
6244} // func __kmp_register_library_startup
6245
Jonathan Peyton30419822017-05-12 18:01:32 +00006246void __kmp_unregister_library(void) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006247
Jonathan Peyton30419822017-05-12 18:01:32 +00006248 char *name = __kmp_reg_status_name();
6249 char *value = __kmp_env_get(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006250
Jonathan Peyton30419822017-05-12 18:01:32 +00006251 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6252 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6253 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6254 // Ok, this is our variable. Delete it.
6255 __kmp_env_unset(name);
6256 }; // if
Jim Cownie5e8470a2013-09-27 10:38:44 +00006257
Jonathan Peyton30419822017-05-12 18:01:32 +00006258 KMP_INTERNAL_FREE(__kmp_registration_str);
6259 KMP_INTERNAL_FREE(value);
6260 KMP_INTERNAL_FREE(name);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006261
Jonathan Peyton30419822017-05-12 18:01:32 +00006262 __kmp_registration_flag = 0;
6263 __kmp_registration_str = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006264
6265} // __kmp_unregister_library
6266
Jim Cownie5e8470a2013-09-27 10:38:44 +00006267// End of Library registration stuff.
Jonathan Peyton30419822017-05-12 18:01:32 +00006268// -----------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00006269
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006270#if KMP_MIC_SUPPORTED
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006271
Jonathan Peyton30419822017-05-12 18:01:32 +00006272static void __kmp_check_mic_type() {
6273 kmp_cpuid_t cpuid_state = {0};
6274 kmp_cpuid_t *cs_p = &cpuid_state;
6275 __kmp_x86_cpuid(1, 0, cs_p);
6276 // We don't support mic1 at the moment
6277 if ((cs_p->eax & 0xff0) == 0xB10) {
6278 __kmp_mic_type = mic2;
6279 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6280 __kmp_mic_type = mic3;
6281 } else {
6282 __kmp_mic_type = non_mic;
6283 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006284}
6285
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006286#endif /* KMP_MIC_SUPPORTED */
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006287
Jonathan Peyton30419822017-05-12 18:01:32 +00006288static void __kmp_do_serial_initialize(void) {
6289 int i, gtid;
6290 int size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006291
Jonathan Peyton30419822017-05-12 18:01:32 +00006292 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006293
Jonathan Peyton30419822017-05-12 18:01:32 +00006294 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4);
6295 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4);
6296 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8);
6297 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8);
6298 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006299
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006300#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006301 ompt_pre_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006302#endif
6303
Jonathan Peyton30419822017-05-12 18:01:32 +00006304 __kmp_validate_locks();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006305
Jonathan Peyton30419822017-05-12 18:01:32 +00006306 /* Initialize internal memory allocator */
6307 __kmp_init_allocator();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006308
Jonathan Peyton30419822017-05-12 18:01:32 +00006309 /* Register the library startup via an environment variable and check to see
6310 whether another copy of the library is already registered. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006311
Jonathan Peyton30419822017-05-12 18:01:32 +00006312 __kmp_register_library_startup();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006313
Jonathan Peyton30419822017-05-12 18:01:32 +00006314 /* TODO reinitialization of library */
6315 if (TCR_4(__kmp_global.g.g_done)) {
6316 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"));
6317 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006318
Jonathan Peyton30419822017-05-12 18:01:32 +00006319 __kmp_global.g.g_abort = 0;
6320 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006321
Jonathan Peyton30419822017-05-12 18:01:32 +00006322/* initialize the locks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006323#if KMP_USE_ADAPTIVE_LOCKS
6324#if KMP_DEBUG_ADAPTIVE_LOCKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006325 __kmp_init_speculative_stats();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006326#endif
6327#endif
Jonathan Peytonad579922015-12-17 16:19:05 +00006328#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006329 __kmp_stats_init();
Jonathan Peytonad579922015-12-17 16:19:05 +00006330#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006331 __kmp_init_lock(&__kmp_global_lock);
6332 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6333 __kmp_init_lock(&__kmp_debug_lock);
6334 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6335 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6336 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6337 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6338 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6339 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6340 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6341 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6342 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6343 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6344 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6345 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6346 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6347 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6348 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006349#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006350 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
Jonathan Peytonb66d1aa2016-09-27 17:11:17 +00006351#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006352 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006353
Jonathan Peyton30419822017-05-12 18:01:32 +00006354 /* conduct initialization and initial setup of configuration */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006355
Jonathan Peyton30419822017-05-12 18:01:32 +00006356 __kmp_runtime_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006357
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006358#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006359 __kmp_check_mic_type();
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006360#endif
6361
Jonathan Peyton30419822017-05-12 18:01:32 +00006362// Some global variable initialization moved here from kmp_env_initialize()
Jim Cownie5e8470a2013-09-27 10:38:44 +00006363#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006364 kmp_diag = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006365#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006366 __kmp_abort_delay = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006367
Jonathan Peyton30419822017-05-12 18:01:32 +00006368 // From __kmp_init_dflt_team_nth()
6369 /* assume the entire machine will be used */
6370 __kmp_dflt_team_nth_ub = __kmp_xproc;
6371 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6372 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6373 }
6374 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6375 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6376 }
6377 __kmp_max_nth = __kmp_sys_max_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006378
Jonathan Peyton30419822017-05-12 18:01:32 +00006379 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
6380 // part
6381 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006382#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00006383 __kmp_monitor_wakeups =
6384 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6385 __kmp_bt_intervals =
6386 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00006387#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006388 // From "KMP_LIBRARY" part of __kmp_env_initialize()
6389 __kmp_library = library_throughput;
6390 // From KMP_SCHEDULE initialization
6391 __kmp_static = kmp_sch_static_balanced;
6392// AC: do not use analytical here, because it is non-monotonous
6393//__kmp_guided = kmp_sch_guided_iterative_chunked;
6394//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
6395// need to repeat assignment
6396// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
6397// bit control and barrier method control parts
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006398#if KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton30419822017-05-12 18:01:32 +00006399#define kmp_reduction_barrier_gather_bb ((int)1)
6400#define kmp_reduction_barrier_release_bb ((int)1)
6401#define kmp_reduction_barrier_gather_pat bp_hyper_bar
6402#define kmp_reduction_barrier_release_pat bp_hyper_bar
6403#endif // KMP_FAST_REDUCTION_BARRIER
6404 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6405 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6406 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6407 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6408 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6409#if KMP_FAST_REDUCTION_BARRIER
6410 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
6411 // lin_64 ): hyper,1
6412 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6413 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6414 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6415 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
Andrey Churbanov613edeb2015-02-20 18:14:43 +00006416 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006417#endif // KMP_FAST_REDUCTION_BARRIER
6418 }
6419#if KMP_FAST_REDUCTION_BARRIER
6420#undef kmp_reduction_barrier_release_pat
6421#undef kmp_reduction_barrier_gather_pat
6422#undef kmp_reduction_barrier_release_bb
6423#undef kmp_reduction_barrier_gather_bb
6424#endif // KMP_FAST_REDUCTION_BARRIER
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006425#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006426 if (__kmp_mic_type == mic2) { // KNC
6427 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
6428 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather
6429 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6430 1; // forkjoin release
6431 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6432 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6433 }
6434#if KMP_FAST_REDUCTION_BARRIER
6435 if (__kmp_mic_type == mic2) { // KNC
6436 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6437 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6438 }
Jonathan Peyton492e0a32017-06-13 17:17:26 +00006439#endif // KMP_FAST_REDUCTION_BARRIER
6440#endif // KMP_MIC_SUPPORTED
Jim Cownie5e8470a2013-09-27 10:38:44 +00006441
Jonathan Peyton30419822017-05-12 18:01:32 +00006442// From KMP_CHECKS initialization
Jim Cownie5e8470a2013-09-27 10:38:44 +00006443#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00006444 __kmp_env_checks = TRUE; /* development versions have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006445#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006446 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006447#endif
6448
Jonathan Peyton30419822017-05-12 18:01:32 +00006449 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
6450 __kmp_foreign_tp = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006451
Jonathan Peyton30419822017-05-12 18:01:32 +00006452 __kmp_global.g.g_dynamic = FALSE;
6453 __kmp_global.g.g_dynamic_mode = dynamic_default;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006454
Jonathan Peyton30419822017-05-12 18:01:32 +00006455 __kmp_env_initialize(NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006456
Jonathan Peyton30419822017-05-12 18:01:32 +00006457// Print all messages in message catalog for testing purposes.
6458#ifdef KMP_DEBUG
6459 char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
6460 if (__kmp_str_match_true(val)) {
6461 kmp_str_buf_t buffer;
6462 __kmp_str_buf_init(&buffer);
6463 __kmp_i18n_dump_catalog(&buffer);
6464 __kmp_printf("%s", buffer.str);
6465 __kmp_str_buf_free(&buffer);
6466 }; // if
6467 __kmp_env_free(&val);
6468#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006469
Jonathan Peyton30419822017-05-12 18:01:32 +00006470 __kmp_threads_capacity =
6471 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6472 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
6473 __kmp_tp_capacity = __kmp_default_tp_capacity(
6474 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006475
Jonathan Peyton30419822017-05-12 18:01:32 +00006476 // If the library is shut down properly, both pools must be NULL. Just in
6477 // case, set them to NULL -- some memory may leak, but subsequent code will
6478 // work even if pools are not freed.
6479 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6480 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6481 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6482 __kmp_thread_pool = NULL;
6483 __kmp_thread_pool_insert_pt = NULL;
6484 __kmp_team_pool = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006485
Jonathan Peyton30419822017-05-12 18:01:32 +00006486 /* Allocate all of the variable sized records */
6487 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are
6488 * expandable */
6489 /* Since allocation is cache-aligned, just add extra padding at the end */
6490 size =
6491 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6492 CACHE_LINE;
6493 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6494 __kmp_root = (kmp_root_t **)((char *)__kmp_threads +
6495 sizeof(kmp_info_t *) * __kmp_threads_capacity);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006496
Jonathan Peyton30419822017-05-12 18:01:32 +00006497 /* init thread counts */
6498 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6499 0); // Asserts fail if the library is reinitializing and
6500 KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination.
6501 __kmp_all_nth = 0;
6502 __kmp_nth = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006503
Jonathan Peyton30419822017-05-12 18:01:32 +00006504 /* setup the uber master thread and hierarchy */
6505 gtid = __kmp_register_root(TRUE);
6506 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid));
6507 KMP_ASSERT(KMP_UBER_GTID(gtid));
6508 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006509
Jonathan Peyton30419822017-05-12 18:01:32 +00006510 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006511
Jonathan Peyton30419822017-05-12 18:01:32 +00006512 __kmp_common_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006513
Jonathan Peyton30419822017-05-12 18:01:32 +00006514#if KMP_OS_UNIX
6515 /* invoke the child fork handler */
6516 __kmp_register_atfork();
6517#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006518
Jonathan Peyton30419822017-05-12 18:01:32 +00006519#if !defined KMP_DYNAMIC_LIB
6520 {
6521 /* Invoke the exit handler when the program finishes, only for static
6522 library. For dynamic library, we already have _fini and DllMain. */
6523 int rc = atexit(__kmp_internal_end_atexit);
6524 if (rc != 0) {
6525 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc),
6526 __kmp_msg_null);
6527 }; // if
6528 }
6529#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006530
Jonathan Peyton30419822017-05-12 18:01:32 +00006531#if KMP_HANDLE_SIGNALS
6532#if KMP_OS_UNIX
6533 /* NOTE: make sure that this is called before the user installs their own
6534 signal handlers so that the user handlers are called first. this way they
6535 can return false, not call our handler, avoid terminating the library, and
6536 continue execution where they left off. */
6537 __kmp_install_signals(FALSE);
6538#endif /* KMP_OS_UNIX */
6539#if KMP_OS_WINDOWS
6540 __kmp_install_signals(TRUE);
6541#endif /* KMP_OS_WINDOWS */
6542#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006543
Jonathan Peyton30419822017-05-12 18:01:32 +00006544 /* we have finished the serial initialization */
6545 __kmp_init_counter++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006546
Jonathan Peyton30419822017-05-12 18:01:32 +00006547 __kmp_init_serial = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006548
Jonathan Peyton30419822017-05-12 18:01:32 +00006549 if (__kmp_settings) {
6550 __kmp_env_print();
6551 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006552
6553#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006554 if (__kmp_display_env || __kmp_display_env_verbose) {
6555 __kmp_env_print_2();
6556 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006557#endif // OMP_40_ENABLED
6558
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006559#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006560 ompt_post_init();
Jonathan Peyton82a13bf2015-09-21 18:01:02 +00006561#endif
6562
Jonathan Peyton30419822017-05-12 18:01:32 +00006563 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006564
Jonathan Peyton30419822017-05-12 18:01:32 +00006565 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006566}
6567
Jonathan Peyton30419822017-05-12 18:01:32 +00006568void __kmp_serial_initialize(void) {
6569 if (__kmp_init_serial) {
6570 return;
6571 }
6572 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6573 if (__kmp_init_serial) {
6574 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6575 return;
6576 }
6577 __kmp_do_serial_initialize();
6578 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6579}
6580
6581static void __kmp_do_middle_initialize(void) {
6582 int i, j;
6583 int prev_dflt_team_nth;
6584
6585 if (!__kmp_init_serial) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006586 __kmp_do_serial_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006587 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006588
Jonathan Peyton30419822017-05-12 18:01:32 +00006589 KA_TRACE(10, ("__kmp_middle_initialize: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006590
Jonathan Peyton30419822017-05-12 18:01:32 +00006591 // Save the previous value for the __kmp_dflt_team_nth so that
6592 // we can avoid some reinitialization if it hasn't changed.
6593 prev_dflt_team_nth = __kmp_dflt_team_nth;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006594
Alp Toker98758b02014-03-02 04:12:06 +00006595#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00006596 // __kmp_affinity_initialize() will try to set __kmp_ncores to the
6597 // number of cores on the machine.
6598 __kmp_affinity_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006599
Jonathan Peyton30419822017-05-12 18:01:32 +00006600 // Run through the __kmp_threads array and set the affinity mask
6601 // for each root thread that is currently registered with the RTL.
6602 for (i = 0; i < __kmp_threads_capacity; i++) {
6603 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6604 __kmp_affinity_set_init_mask(i, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006605 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006606 }
Alp Toker98758b02014-03-02 04:12:06 +00006607#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006608
Jonathan Peyton30419822017-05-12 18:01:32 +00006609 KMP_ASSERT(__kmp_xproc > 0);
6610 if (__kmp_avail_proc == 0) {
6611 __kmp_avail_proc = __kmp_xproc;
6612 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006613
Jonathan Peyton30419822017-05-12 18:01:32 +00006614 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),
6615 // correct them now
6616 j = 0;
6617 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6618 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6619 __kmp_avail_proc;
6620 j++;
6621 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006622
Jonathan Peyton30419822017-05-12 18:01:32 +00006623 if (__kmp_dflt_team_nth == 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006624#ifdef KMP_DFLT_NTH_CORES
Jonathan Peyton30419822017-05-12 18:01:32 +00006625 // Default #threads = #cores
6626 __kmp_dflt_team_nth = __kmp_ncores;
6627 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6628 "__kmp_ncores (%d)\n",
6629 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006630#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006631 // Default #threads = #available OS procs
6632 __kmp_dflt_team_nth = __kmp_avail_proc;
6633 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6634 "__kmp_avail_proc(%d)\n",
6635 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006636#endif /* KMP_DFLT_NTH_CORES */
Jonathan Peyton30419822017-05-12 18:01:32 +00006637 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006638
Jonathan Peyton30419822017-05-12 18:01:32 +00006639 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6640 __kmp_dflt_team_nth = KMP_MIN_NTH;
6641 }
6642 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6643 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6644 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006645
Jonathan Peyton30419822017-05-12 18:01:32 +00006646 // There's no harm in continuing if the following check fails,
6647 // but it indicates an error in the previous logic.
6648 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006649
Jonathan Peyton30419822017-05-12 18:01:32 +00006650 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6651 // Run through the __kmp_threads array and set the num threads icv for each
6652 // root thread that is currently registered with the RTL (which has not
6653 // already explicitly set its nthreads-var with a call to
6654 // omp_set_num_threads()).
6655 for (i = 0; i < __kmp_threads_capacity; i++) {
6656 kmp_info_t *thread = __kmp_threads[i];
6657 if (thread == NULL)
6658 continue;
6659 if (thread->th.th_current_task->td_icvs.nproc != 0)
6660 continue;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006661
Jonathan Peyton30419822017-05-12 18:01:32 +00006662 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006663 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006664 }
6665 KA_TRACE(
6666 20,
6667 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6668 __kmp_dflt_team_nth));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006669
6670#ifdef KMP_ADJUST_BLOCKTIME
Jonathan Peyton30419822017-05-12 18:01:32 +00006671 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */
6672 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6673 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6674 if (__kmp_nth > __kmp_avail_proc) {
6675 __kmp_zero_bt = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006676 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006677 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006678#endif /* KMP_ADJUST_BLOCKTIME */
6679
Jonathan Peyton30419822017-05-12 18:01:32 +00006680 /* we have finished middle initialization */
6681 TCW_SYNC_4(__kmp_init_middle, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006682
Jonathan Peyton30419822017-05-12 18:01:32 +00006683 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006684}
6685
Jonathan Peyton30419822017-05-12 18:01:32 +00006686void __kmp_middle_initialize(void) {
6687 if (__kmp_init_middle) {
6688 return;
6689 }
6690 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6691 if (__kmp_init_middle) {
6692 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6693 return;
6694 }
6695 __kmp_do_middle_initialize();
6696 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6697}
6698
6699void __kmp_parallel_initialize(void) {
6700 int gtid = __kmp_entry_gtid(); // this might be a new root
6701
6702 /* synchronize parallel initialization (for sibling) */
6703 if (TCR_4(__kmp_init_parallel))
6704 return;
6705 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6706 if (TCR_4(__kmp_init_parallel)) {
6707 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6708 return;
6709 }
6710
6711 /* TODO reinitialization after we have already shut down */
6712 if (TCR_4(__kmp_global.g.g_done)) {
6713 KA_TRACE(
6714 10,
6715 ("__kmp_parallel_initialize: attempt to init while shutting down\n"));
6716 __kmp_infinite_loop();
6717 }
6718
6719 /* jc: The lock __kmp_initz_lock is already held, so calling
6720 __kmp_serial_initialize would cause a deadlock. So we call
6721 __kmp_do_serial_initialize directly. */
6722 if (!__kmp_init_middle) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00006723 __kmp_do_middle_initialize();
Jonathan Peyton30419822017-05-12 18:01:32 +00006724 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006725
Jonathan Peyton30419822017-05-12 18:01:32 +00006726 /* begin initialization */
6727 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"));
6728 KMP_ASSERT(KMP_UBER_GTID(gtid));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006729
6730#if KMP_ARCH_X86 || KMP_ARCH_X86_64
Jonathan Peyton30419822017-05-12 18:01:32 +00006731 // Save the FP control regs.
6732 // Worker threads will set theirs to these values at thread startup.
6733 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6734 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6735 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006736#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6737
6738#if KMP_OS_UNIX
Jonathan Peyton30419822017-05-12 18:01:32 +00006739#if KMP_HANDLE_SIGNALS
6740 /* must be after __kmp_serial_initialize */
6741 __kmp_install_signals(TRUE);
6742#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00006743#endif
6744
Jonathan Peyton30419822017-05-12 18:01:32 +00006745 __kmp_suspend_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006746
Jonathan Peyton749b4d52016-01-27 21:02:04 +00006747#if defined(USE_LOAD_BALANCE)
Jonathan Peyton30419822017-05-12 18:01:32 +00006748 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6749 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6750 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006751#else
Jonathan Peyton30419822017-05-12 18:01:32 +00006752 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6753 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6754 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006755#endif
6756
Jonathan Peyton30419822017-05-12 18:01:32 +00006757 if (__kmp_version) {
6758 __kmp_print_version_2();
6759 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006760
Jonathan Peyton30419822017-05-12 18:01:32 +00006761 /* we have finished parallel initialization */
6762 TCW_SYNC_4(__kmp_init_parallel, TRUE);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006763
Jonathan Peyton30419822017-05-12 18:01:32 +00006764 KMP_MB();
6765 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00006766
Jonathan Peyton30419822017-05-12 18:01:32 +00006767 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006768}
6769
Jim Cownie5e8470a2013-09-27 10:38:44 +00006770/* ------------------------------------------------------------------------ */
6771
Jonathan Peyton30419822017-05-12 18:01:32 +00006772void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
6773 kmp_team_t *team) {
6774 kmp_disp_t *dispatch;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006775
Jonathan Peyton30419822017-05-12 18:01:32 +00006776 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00006777
Jonathan Peyton30419822017-05-12 18:01:32 +00006778 /* none of the threads have encountered any constructs, yet. */
6779 this_thr->th.th_local.this_construct = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006780#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +00006781 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006782#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +00006783 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6784 KMP_DEBUG_ASSERT(dispatch);
6785 KMP_DEBUG_ASSERT(team->t.t_dispatch);
6786 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[
6787 // this_thr->th.th_info.ds.ds_tid ] );
Jim Cownie5e8470a2013-09-27 10:38:44 +00006788
Jonathan Peyton30419822017-05-12 18:01:32 +00006789 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
Jonathan Peytondf6818b2016-06-14 17:57:47 +00006790#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006791 dispatch->th_doacross_buf_idx =
6792 0; /* reset the doacross dispatch buffer counter */
Jonathan Peyton71909c52016-03-02 22:42:06 +00006793#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006794 if (__kmp_env_consistency_check)
6795 __kmp_push_parallel(gtid, team->t.t_ident);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006796
Jonathan Peyton30419822017-05-12 18:01:32 +00006797 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006798}
6799
Jonathan Peyton30419822017-05-12 18:01:32 +00006800void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
6801 kmp_team_t *team) {
6802 if (__kmp_env_consistency_check)
6803 __kmp_pop_parallel(gtid, team->t.t_ident);
Andrey Churbanovdf0d75e2016-10-27 11:43:07 +00006804
Jonathan Peyton30419822017-05-12 18:01:32 +00006805 __kmp_finish_implicit_task(this_thr);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006806}
6807
Jonathan Peyton30419822017-05-12 18:01:32 +00006808int __kmp_invoke_task_func(int gtid) {
6809 int rc;
6810 int tid = __kmp_tid_from_gtid(gtid);
6811 kmp_info_t *this_thr = __kmp_threads[gtid];
6812 kmp_team_t *team = this_thr->th.th_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006813
Jonathan Peyton30419822017-05-12 18:01:32 +00006814 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006815#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00006816 if (__itt_stack_caller_create_ptr) {
6817 __kmp_itt_stack_callee_enter(
6818 (__itt_caller)
6819 team->t.t_stack_id); // inform ittnotify about entering user's code
6820 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006821#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006822#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006823 SSC_MARK_INVOKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006824#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006825
6826#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006827 void *dummy;
6828 void **exit_runtime_p;
6829 ompt_task_id_t my_task_id;
6830 ompt_parallel_id_t my_parallel_id;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006831
Jonathan Peyton30419822017-05-12 18:01:32 +00006832 if (ompt_enabled) {
6833 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]
6834 .ompt_task_info.frame.exit_runtime_frame);
6835 } else {
6836 exit_runtime_p = &dummy;
6837 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006838
6839#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00006840 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6841 my_parallel_id = team->t.ompt_team_info.parallel_id;
6842 if (ompt_enabled &&
6843 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6844 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id,
6845 my_task_id);
6846 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006847#endif
6848#endif
6849
Jonathan Peyton30419822017-05-12 18:01:32 +00006850 {
6851 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6852 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6853 rc =
6854 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
6855 tid, (int)team->t.t_argc, (void **)team->t.t_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006856#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006857 ,
6858 exit_runtime_p
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006859#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006860 );
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006861#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006862 *exit_runtime_p = NULL;
Jonas Hahnfeld8a270642016-09-14 13:59:19 +00006863#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006864 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006865
Jim Cownie5e8470a2013-09-27 10:38:44 +00006866#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00006867 if (__itt_stack_caller_create_ptr) {
6868 __kmp_itt_stack_callee_leave(
6869 (__itt_caller)
6870 team->t.t_stack_id); // inform ittnotify about leaving user's code
6871 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00006872#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00006873 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006874
Jonathan Peyton30419822017-05-12 18:01:32 +00006875 return rc;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006876}
6877
6878#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00006879void __kmp_teams_master(int gtid) {
6880 // This routine is called by all master threads in teams construct
6881 kmp_info_t *thr = __kmp_threads[gtid];
6882 kmp_team_t *team = thr->th.th_team;
6883 ident_t *loc = team->t.t_ident;
6884 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6885 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
6886 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
6887 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
6888 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
6889// Launch league of teams now, but not let workers execute
6890// (they hang on fork barrier until next parallel)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006891#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006892 SSC_MARK_FORKING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006893#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006894 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006895#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006896 (void *)thr->th.th_teams_microtask, // "unwrapped" task
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00006897#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006898 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task
6899 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006900#if INCLUDE_SSC_MARKS
Jonathan Peyton30419822017-05-12 18:01:32 +00006901 SSC_MARK_JOINING();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006902#endif
Jonathan Peyton61118492016-05-20 19:03:38 +00006903
Jonathan Peyton30419822017-05-12 18:01:32 +00006904 // AC: last parameter "1" eliminates join barrier which won't work because
6905 // worker threads are in a fork barrier waiting for more parallel regions
6906 __kmp_join_call(loc, gtid
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006907#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00006908 ,
6909 fork_context_intel
Jonathan Peytonf89fbbb2015-08-31 18:15:00 +00006910#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00006911 ,
6912 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006913}
6914
Jonathan Peyton30419822017-05-12 18:01:32 +00006915int __kmp_invoke_teams_master(int gtid) {
6916 kmp_info_t *this_thr = __kmp_threads[gtid];
6917 kmp_team_t *team = this_thr->th.th_team;
6918#if KMP_DEBUG
6919 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
6920 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
6921 (void *)__kmp_teams_master);
6922#endif
6923 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
6924 __kmp_teams_master(gtid);
6925 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
6926 return 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006927}
6928#endif /* OMP_40_ENABLED */
6929
6930/* this sets the requested number of threads for the next parallel region
Jonathan Peyton30419822017-05-12 18:01:32 +00006931 encountered by this team. since this should be enclosed in the forkjoin
6932 critical section it should avoid race conditions with assymmetrical nested
6933 parallelism */
Jim Cownie5e8470a2013-09-27 10:38:44 +00006934
Jonathan Peyton30419822017-05-12 18:01:32 +00006935void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
6936 kmp_info_t *thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00006937
Jonathan Peyton30419822017-05-12 18:01:32 +00006938 if (num_threads > 0)
6939 thr->th.th_set_nproc = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006940}
6941
6942#if OMP_40_ENABLED
6943
6944/* this sets the requested number of teams for the teams region and/or
Jonathan Peyton30419822017-05-12 18:01:32 +00006945 the number of threads for the next parallel region encountered */
6946void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
6947 int num_threads) {
6948 kmp_info_t *thr = __kmp_threads[gtid];
6949 KMP_DEBUG_ASSERT(num_teams >= 0);
6950 KMP_DEBUG_ASSERT(num_threads >= 0);
Jonathan Peyton1be692e2015-11-30 20:14:05 +00006951
Jonathan Peyton30419822017-05-12 18:01:32 +00006952 if (num_teams == 0)
6953 num_teams = 1; // default number of teams is 1.
6954 if (num_teams > __kmp_max_nth) { // if too many teams requested?
6955 if (!__kmp_reserve_warn) {
6956 __kmp_reserve_warn = 1;
6957 __kmp_msg(kmp_ms_warning,
6958 KMP_MSG(CantFormThrTeam, num_teams, __kmp_max_nth),
6959 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
Jim Cownie5e8470a2013-09-27 10:38:44 +00006960 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006961 num_teams = __kmp_max_nth;
6962 }
6963 // Set number of teams (number of threads in the outer "parallel" of the
6964 // teams)
6965 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00006966
Jonathan Peyton30419822017-05-12 18:01:32 +00006967 // Remember the number of threads for inner parallel regions
6968 if (num_threads == 0) {
6969 if (!TCR_4(__kmp_init_middle))
6970 __kmp_middle_initialize(); // get __kmp_avail_proc calculated
6971 num_threads = __kmp_avail_proc / num_teams;
6972 if (num_teams * num_threads > __kmp_max_nth) {
6973 // adjust num_threads w/o warning as it is not user setting
6974 num_threads = __kmp_max_nth / num_teams;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006975 }
Jonathan Peyton30419822017-05-12 18:01:32 +00006976 } else {
6977 if (num_teams * num_threads > __kmp_max_nth) {
6978 int new_threads = __kmp_max_nth / num_teams;
6979 if (!__kmp_reserve_warn) { // user asked for too many threads
6980 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
6981 __kmp_msg(kmp_ms_warning,
6982 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
6983 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
6984 }
6985 num_threads = new_threads;
6986 }
6987 }
6988 thr->th.th_teams_size.nth = num_threads;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006989}
6990
Jim Cownie5e8470a2013-09-27 10:38:44 +00006991// Set the proc_bind var to use in the following parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00006992void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
6993 kmp_info_t *thr = __kmp_threads[gtid];
6994 thr->th.th_set_proc_bind = proc_bind;
Jim Cownie5e8470a2013-09-27 10:38:44 +00006995}
6996
6997#endif /* OMP_40_ENABLED */
6998
6999/* Launch the worker threads into the microtask. */
7000
Jonathan Peyton30419822017-05-12 18:01:32 +00007001void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
7002 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007003
7004#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007005 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007006#endif /* KMP_DEBUG */
7007
Jonathan Peyton30419822017-05-12 18:01:32 +00007008 KMP_DEBUG_ASSERT(team);
7009 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7010 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7011 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007012
Jonathan Peyton30419822017-05-12 18:01:32 +00007013 team->t.t_construct = 0; /* no single directives seen yet */
7014 team->t.t_ordered.dt.t_value =
7015 0; /* thread 0 enters the ordered section first */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007016
Jonathan Peyton30419822017-05-12 18:01:32 +00007017 /* Reset the identifiers on the dispatch buffer */
7018 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7019 if (team->t.t_max_nproc > 1) {
7020 int i;
7021 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7022 team->t.t_disp_buffer[i].buffer_index = i;
Jonathan Peytondf6818b2016-06-14 17:57:47 +00007023#if OMP_45_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007024 team->t.t_disp_buffer[i].doacross_buf_idx = i;
Jonathan Peyton71909c52016-03-02 22:42:06 +00007025#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007026 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007027 } else {
7028 team->t.t_disp_buffer[0].buffer_index = 0;
7029#if OMP_45_ENABLED
7030 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7031#endif
7032 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007033
Jonathan Peyton30419822017-05-12 18:01:32 +00007034 KMP_MB(); /* Flush all pending memory write invalidates. */
7035 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007036
7037#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007038 for (f = 0; f < team->t.t_nproc; f++) {
7039 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7040 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7041 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007042#endif /* KMP_DEBUG */
7043
Jonathan Peyton30419822017-05-12 18:01:32 +00007044 /* release the worker threads so they may begin working */
7045 __kmp_fork_barrier(gtid, 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007046}
7047
Jonathan Peyton30419822017-05-12 18:01:32 +00007048void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
7049 kmp_info_t *this_thr = __kmp_threads[gtid];
Jim Cownie5e8470a2013-09-27 10:38:44 +00007050
Jonathan Peyton30419822017-05-12 18:01:32 +00007051 KMP_DEBUG_ASSERT(team);
7052 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7053 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7054 KMP_MB(); /* Flush all pending memory write invalidates. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007055
Jonathan Peyton30419822017-05-12 18:01:32 +00007056/* Join barrier after fork */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007057
7058#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00007059 if (__kmp_threads[gtid] &&
7060 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7061 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7062 __kmp_threads[gtid]);
7063 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7064 "team->t.t_nproc=%d\n",
7065 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7066 team->t.t_nproc);
7067 __kmp_print_structure();
7068 }
7069 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7070 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007071#endif /* KMP_DEBUG */
7072
Jonathan Peyton30419822017-05-12 18:01:32 +00007073 __kmp_join_barrier(gtid); /* wait for everyone */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007074
Jonathan Peyton30419822017-05-12 18:01:32 +00007075 KMP_MB(); /* Flush all pending memory write invalidates. */
7076 KMP_ASSERT(this_thr->th.th_team == team);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007077}
7078
Jim Cownie5e8470a2013-09-27 10:38:44 +00007079/* ------------------------------------------------------------------------ */
7080
7081#ifdef USE_LOAD_BALANCE
7082
Jim Cownie5e8470a2013-09-27 10:38:44 +00007083// Return the worker threads actively spinning in the hot team, if we
7084// are at the outermost level of parallelism. Otherwise, return 0.
Jonathan Peyton30419822017-05-12 18:01:32 +00007085static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7086 int i;
7087 int retval;
7088 kmp_team_t *hot_team;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007089
Jonathan Peyton30419822017-05-12 18:01:32 +00007090 if (root->r.r_active) {
7091 return 0;
7092 }
7093 hot_team = root->r.r_hot_team;
7094 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7095 return hot_team->t.t_nproc - 1; // Don't count master thread
7096 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007097
Jonathan Peyton30419822017-05-12 18:01:32 +00007098 // Skip the master thread - it is accounted for elsewhere.
7099 retval = 0;
7100 for (i = 1; i < hot_team->t.t_nproc; i++) {
7101 if (hot_team->t.t_threads[i]->th.th_active) {
7102 retval++;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007103 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007104 }
7105 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007106}
7107
Jim Cownie5e8470a2013-09-27 10:38:44 +00007108// Perform an automatic adjustment to the number of
7109// threads used by the next parallel region.
Jonathan Peyton30419822017-05-12 18:01:32 +00007110static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {
7111 int retval;
7112 int pool_active;
7113 int hot_team_active;
7114 int team_curr_active;
7115 int system_active;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007116
Jonathan Peyton30419822017-05-12 18:01:32 +00007117 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7118 set_nproc));
7119 KMP_DEBUG_ASSERT(root);
7120 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7121 ->th.th_current_task->td_icvs.dynamic == TRUE);
7122 KMP_DEBUG_ASSERT(set_nproc > 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007123
Jonathan Peyton30419822017-05-12 18:01:32 +00007124 if (set_nproc == 1) {
7125 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"));
7126 return 1;
7127 }
7128
7129 // Threads that are active in the thread pool, active in the hot team for this
7130 // particular root (if we are at the outer par level), and the currently
7131 // executing thread (to become the master) are available to add to the new
7132 // team, but are currently contributing to the system load, and must be
7133 // accounted for.
7134 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7135 hot_team_active = __kmp_active_hot_team_nproc(root);
7136 team_curr_active = pool_active + hot_team_active + 1;
7137
7138 // Check the system load.
7139 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7140 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "
7141 "hot team active = %d\n",
7142 system_active, pool_active, hot_team_active));
7143
7144 if (system_active < 0) {
7145 // There was an error reading the necessary info from /proc, so use the
7146 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode
7147 // = dynamic_thread_limit, we shouldn't wind up getting back here.
7148 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7149 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit");
7150
7151 // Make this call behave like the thread limit algorithm.
7152 retval = __kmp_avail_proc - __kmp_nth +
7153 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7154 if (retval > set_nproc) {
7155 retval = set_nproc;
7156 }
7157 if (retval < KMP_MIN_NTH) {
7158 retval = KMP_MIN_NTH;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007159 }
7160
Jonathan Peyton30419822017-05-12 18:01:32 +00007161 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7162 retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007163 return retval;
Jonathan Peyton30419822017-05-12 18:01:32 +00007164 }
7165
7166 // There is a slight delay in the load balance algorithm in detecting new
7167 // running procs. The real system load at this instant should be at least as
7168 // large as the #active omp thread that are available to add to the team.
7169 if (system_active < team_curr_active) {
7170 system_active = team_curr_active;
7171 }
7172 retval = __kmp_avail_proc - system_active + team_curr_active;
7173 if (retval > set_nproc) {
7174 retval = set_nproc;
7175 }
7176 if (retval < KMP_MIN_NTH) {
7177 retval = KMP_MIN_NTH;
7178 }
7179
7180 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7181 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007182} // __kmp_load_balance_nproc()
7183
7184#endif /* USE_LOAD_BALANCE */
7185
Jim Cownie5e8470a2013-09-27 10:38:44 +00007186/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007187
7188/* NOTE: this is called with the __kmp_init_lock held */
Jonathan Peyton30419822017-05-12 18:01:32 +00007189void __kmp_cleanup(void) {
7190 int f;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007191
Jonathan Peyton30419822017-05-12 18:01:32 +00007192 KA_TRACE(10, ("__kmp_cleanup: enter\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007193
Jonathan Peyton30419822017-05-12 18:01:32 +00007194 if (TCR_4(__kmp_init_parallel)) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007195#if KMP_HANDLE_SIGNALS
Jonathan Peyton30419822017-05-12 18:01:32 +00007196 __kmp_remove_signals();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007197#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007198 TCW_4(__kmp_init_parallel, FALSE);
7199 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007200
Jonathan Peyton30419822017-05-12 18:01:32 +00007201 if (TCR_4(__kmp_init_middle)) {
Alp Toker763b9392014-02-28 09:42:41 +00007202#if KMP_AFFINITY_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007203 __kmp_affinity_uninitialize();
Alp Toker763b9392014-02-28 09:42:41 +00007204#endif /* KMP_AFFINITY_SUPPORTED */
Jonathan Peyton30419822017-05-12 18:01:32 +00007205 __kmp_cleanup_hierarchy();
7206 TCW_4(__kmp_init_middle, FALSE);
7207 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007208
Jonathan Peyton30419822017-05-12 18:01:32 +00007209 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007210
Jonathan Peyton30419822017-05-12 18:01:32 +00007211 if (__kmp_init_serial) {
7212 __kmp_runtime_destroy();
7213 __kmp_init_serial = FALSE;
7214 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007215
Jonathan Peyton30419822017-05-12 18:01:32 +00007216 for (f = 0; f < __kmp_threads_capacity; f++) {
7217 if (__kmp_root[f] != NULL) {
7218 __kmp_free(__kmp_root[f]);
7219 __kmp_root[f] = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007220 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007221 }
7222 __kmp_free(__kmp_threads);
7223 // __kmp_threads and __kmp_root were allocated at once, as single block, so
7224 // there is no need in freeing __kmp_root.
7225 __kmp_threads = NULL;
7226 __kmp_root = NULL;
7227 __kmp_threads_capacity = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007228
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007229#if KMP_USE_DYNAMIC_LOCK
Jonathan Peyton30419822017-05-12 18:01:32 +00007230 __kmp_cleanup_indirect_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007231#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007232 __kmp_cleanup_user_locks();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00007233#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007234
Jonathan Peyton30419822017-05-12 18:01:32 +00007235#if KMP_AFFINITY_SUPPORTED
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00007236 KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file));
Jonathan Peyton30419822017-05-12 18:01:32 +00007237 __kmp_cpuinfo_file = NULL;
7238#endif /* KMP_AFFINITY_SUPPORTED */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007239
Jonathan Peyton30419822017-05-12 18:01:32 +00007240#if KMP_USE_ADAPTIVE_LOCKS
7241#if KMP_DEBUG_ADAPTIVE_LOCKS
7242 __kmp_print_speculative_stats();
7243#endif
7244#endif
7245 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7246 __kmp_nested_nth.nth = NULL;
7247 __kmp_nested_nth.size = 0;
7248 __kmp_nested_nth.used = 0;
7249 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7250 __kmp_nested_proc_bind.bind_types = NULL;
7251 __kmp_nested_proc_bind.size = 0;
7252 __kmp_nested_proc_bind.used = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007253
Jonathan Peyton30419822017-05-12 18:01:32 +00007254 __kmp_i18n_catclose();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007255
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007256#if KMP_STATS_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007257 __kmp_stats_fini();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00007258#endif
7259
Jonathan Peyton30419822017-05-12 18:01:32 +00007260 KA_TRACE(10, ("__kmp_cleanup: exit\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007261}
7262
7263/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007264
7265int __kmp_ignore_mppbeg(void) {
7266 char *env;
7267
7268 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) {
7269 if (__kmp_str_match_false(env))
7270 return FALSE;
7271 }
7272 // By default __kmpc_begin() is no-op.
7273 return TRUE;
7274}
7275
7276int __kmp_ignore_mppend(void) {
7277 char *env;
7278
7279 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) {
7280 if (__kmp_str_match_false(env))
7281 return FALSE;
7282 }
7283 // By default __kmpc_end() is no-op.
7284 return TRUE;
7285}
7286
7287void __kmp_internal_begin(void) {
7288 int gtid;
7289 kmp_root_t *root;
7290
7291 /* this is a very important step as it will register new sibling threads
7292 and assign these new uber threads a new gtid */
7293 gtid = __kmp_entry_gtid();
7294 root = __kmp_threads[gtid]->th.th_root;
7295 KMP_ASSERT(KMP_UBER_GTID(gtid));
7296
7297 if (root->r.r_begin)
7298 return;
7299 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7300 if (root->r.r_begin) {
7301 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7302 return;
7303 }
7304
7305 root->r.r_begin = TRUE;
7306
7307 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7308}
7309
Jim Cownie5e8470a2013-09-27 10:38:44 +00007310/* ------------------------------------------------------------------------ */
7311
Jonathan Peyton30419822017-05-12 18:01:32 +00007312void __kmp_user_set_library(enum library_type arg) {
7313 int gtid;
7314 kmp_root_t *root;
7315 kmp_info_t *thread;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007316
Jonathan Peyton30419822017-05-12 18:01:32 +00007317 /* first, make sure we are initialized so we can get our gtid */
7318
7319 gtid = __kmp_entry_gtid();
7320 thread = __kmp_threads[gtid];
7321
7322 root = thread->th.th_root;
7323
7324 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7325 library_serial));
7326 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level
7327 thread */
7328 KMP_WARNING(SetLibraryIncorrectCall);
7329 return;
7330 }
7331
7332 switch (arg) {
7333 case library_serial:
7334 thread->th.th_set_nproc = 0;
7335 set__nproc(thread, 1);
7336 break;
7337 case library_turnaround:
7338 thread->th.th_set_nproc = 0;
7339 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7340 : __kmp_dflt_team_nth_ub);
7341 break;
7342 case library_throughput:
7343 thread->th.th_set_nproc = 0;
7344 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7345 : __kmp_dflt_team_nth_ub);
7346 break;
7347 default:
7348 KMP_FATAL(UnknownLibraryType, arg);
7349 }
7350
7351 __kmp_aux_set_library(arg);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007352}
7353
Jonathan Peyton30419822017-05-12 18:01:32 +00007354void __kmp_aux_set_stacksize(size_t arg) {
7355 if (!__kmp_init_serial)
7356 __kmp_serial_initialize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00007357
7358#if KMP_OS_DARWIN
Jonathan Peyton30419822017-05-12 18:01:32 +00007359 if (arg & (0x1000 - 1)) {
7360 arg &= ~(0x1000 - 1);
7361 if (arg + 0x1000) /* check for overflow if we round up */
7362 arg += 0x1000;
7363 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007364#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007365 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007366
Jonathan Peyton30419822017-05-12 18:01:32 +00007367 /* only change the default stacksize before the first parallel region */
7368 if (!TCR_4(__kmp_init_parallel)) {
7369 size_t value = arg; /* argument is in bytes */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007370
Jonathan Peyton30419822017-05-12 18:01:32 +00007371 if (value < __kmp_sys_min_stksize)
7372 value = __kmp_sys_min_stksize;
7373 else if (value > KMP_MAX_STKSIZE)
7374 value = KMP_MAX_STKSIZE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007375
Jonathan Peyton30419822017-05-12 18:01:32 +00007376 __kmp_stksize = value;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007377
Jonathan Peyton30419822017-05-12 18:01:32 +00007378 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
7379 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007380
Jonathan Peyton30419822017-05-12 18:01:32 +00007381 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007382}
7383
7384/* set the behaviour of the runtime library */
7385/* TODO this can cause some odd behaviour with sibling parallelism... */
Jonathan Peyton30419822017-05-12 18:01:32 +00007386void __kmp_aux_set_library(enum library_type arg) {
7387 __kmp_library = arg;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007388
Jonathan Peyton30419822017-05-12 18:01:32 +00007389 switch (__kmp_library) {
7390 case library_serial: {
7391 KMP_INFORM(LibraryIsSerial);
7392 (void)__kmp_change_library(TRUE);
7393 } break;
7394 case library_turnaround:
7395 (void)__kmp_change_library(TRUE);
7396 break;
7397 case library_throughput:
7398 (void)__kmp_change_library(FALSE);
7399 break;
7400 default:
7401 KMP_FATAL(UnknownLibraryType, arg);
7402 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007403}
7404
7405/* ------------------------------------------------------------------------ */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007406
Jonathan Peyton30419822017-05-12 18:01:32 +00007407void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
7408 int blocktime = arg; /* argument is in milliseconds */
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007409#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007410 int bt_intervals;
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007411#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007412 int bt_set;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007413
Jonathan Peyton30419822017-05-12 18:01:32 +00007414 __kmp_save_internal_controls(thread);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007415
Jonathan Peyton30419822017-05-12 18:01:32 +00007416 /* Normalize and set blocktime for the teams */
7417 if (blocktime < KMP_MIN_BLOCKTIME)
7418 blocktime = KMP_MIN_BLOCKTIME;
7419 else if (blocktime > KMP_MAX_BLOCKTIME)
7420 blocktime = KMP_MAX_BLOCKTIME;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007421
Jonathan Peyton30419822017-05-12 18:01:32 +00007422 set__blocktime_team(thread->th.th_team, tid, blocktime);
7423 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007424
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007425#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007426 /* Calculate and set blocktime intervals for the teams */
7427 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007428
Jonathan Peyton30419822017-05-12 18:01:32 +00007429 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
7430 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007431#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007432
Jonathan Peyton30419822017-05-12 18:01:32 +00007433 /* Set whether blocktime has been set to "TRUE" */
7434 bt_set = TRUE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007435
Jonathan Peyton30419822017-05-12 18:01:32 +00007436 set__bt_set_team(thread->th.th_team, tid, bt_set);
7437 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007438#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00007439 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
7440 "bt_intervals=%d, monitor_updates=%d\n",
7441 __kmp_gtid_from_tid(tid, thread->th.th_team),
7442 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7443 __kmp_monitor_wakeups));
Samuel Antao33515192016-10-20 13:20:17 +00007444#else
Jonathan Peyton30419822017-05-12 18:01:32 +00007445 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7446 __kmp_gtid_from_tid(tid, thread->th.th_team),
7447 thread->th.th_team->t.t_id, tid, blocktime));
Jonathan Peytone1c7c132016-10-07 18:12:19 +00007448#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00007449}
7450
Jonathan Peyton30419822017-05-12 18:01:32 +00007451void __kmp_aux_set_defaults(char const *str, int len) {
7452 if (!__kmp_init_serial) {
7453 __kmp_serial_initialize();
7454 };
7455 __kmp_env_initialize(str);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007456
Jonathan Peyton30419822017-05-12 18:01:32 +00007457 if (__kmp_settings
Jim Cownie5e8470a2013-09-27 10:38:44 +00007458#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007459 || __kmp_display_env || __kmp_display_env_verbose
Jim Cownie5e8470a2013-09-27 10:38:44 +00007460#endif // OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00007461 ) {
7462 __kmp_env_print();
7463 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007464} // __kmp_aux_set_defaults
7465
7466/* ------------------------------------------------------------------------ */
Jonathan Peyton30419822017-05-12 18:01:32 +00007467/* internal fast reduction routines */
Jim Cownie5e8470a2013-09-27 10:38:44 +00007468
Jim Cownie5e8470a2013-09-27 10:38:44 +00007469PACKED_REDUCTION_METHOD_T
Jonathan Peyton30419822017-05-12 18:01:32 +00007470__kmp_determine_reduction_method(
7471 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
7472 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
7473 kmp_critical_name *lck) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007474
Jonathan Peyton30419822017-05-12 18:01:32 +00007475 // Default reduction method: critical construct ( lck != NULL, like in current
7476 // PAROPT )
7477 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method
7478 // can be selected by RTL
7479 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method
7480 // can be selected by RTL
7481 // Finally, it's up to OpenMP RTL to make a decision on which method to select
7482 // among generated by PAROPT.
Jim Cownie5e8470a2013-09-27 10:38:44 +00007483
Jonathan Peyton30419822017-05-12 18:01:32 +00007484 PACKED_REDUCTION_METHOD_T retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007485
Jonathan Peyton30419822017-05-12 18:01:32 +00007486 int team_size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007487
Jonathan Peyton30419822017-05-12 18:01:32 +00007488 KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 )
7489 KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )
Jim Cownie5e8470a2013-09-27 10:38:44 +00007490
Jonathan Peyton30419822017-05-12 18:01:32 +00007491#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
7492 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
7493#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
Jim Cownie5e8470a2013-09-27 10:38:44 +00007494
Jonathan Peyton30419822017-05-12 18:01:32 +00007495 retval = critical_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007496
Jonathan Peyton30419822017-05-12 18:01:32 +00007497 // another choice of getting a team size (with 1 dynamic deference) is slower
7498 team_size = __kmp_get_team_num_threads(global_tid);
7499 if (team_size == 1) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007500
Jonathan Peyton30419822017-05-12 18:01:32 +00007501 retval = empty_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007502
Jonathan Peyton30419822017-05-12 18:01:32 +00007503 } else {
Jim Cownie5e8470a2013-09-27 10:38:44 +00007504
Jonathan Peyton30419822017-05-12 18:01:32 +00007505 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7506 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007507
Jonathan Peyton30419822017-05-12 18:01:32 +00007508#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie5e8470a2013-09-27 10:38:44 +00007509
Jonathan Peyton30419822017-05-12 18:01:32 +00007510#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \
7511 KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007512
Jonathan Peyton30419822017-05-12 18:01:32 +00007513 int teamsize_cutoff = 4;
Jonathan Peyton91b78702015-06-08 19:39:07 +00007514
Jonathan Peyton492e0a32017-06-13 17:17:26 +00007515#if KMP_MIC_SUPPORTED
Jonathan Peyton30419822017-05-12 18:01:32 +00007516 if (__kmp_mic_type != non_mic) {
7517 teamsize_cutoff = 8;
7518 }
Andrey Churbanov613edeb2015-02-20 18:14:43 +00007519#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00007520 if (tree_available) {
7521 if (team_size <= teamsize_cutoff) {
7522 if (atomic_available) {
7523 retval = atomic_reduce_block;
Jim Cownie5e8470a2013-09-27 10:38:44 +00007524 }
Jonathan Peyton30419822017-05-12 18:01:32 +00007525 } else {
7526 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7527 }
7528 } else if (atomic_available) {
7529 retval = atomic_reduce_block;
7530 }
7531#else
7532#error "Unknown or unsupported OS"
7533#endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS ||
7534// KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +00007535
Jonathan Peyton30419822017-05-12 18:01:32 +00007536#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
7537
7538#if KMP_OS_LINUX || KMP_OS_WINDOWS
7539
7540 // basic tuning
7541
7542 if (atomic_available) {
7543 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???
7544 retval = atomic_reduce_block;
7545 }
7546 } // otherwise: use critical section
7547
7548#elif KMP_OS_DARWIN
7549
7550 if (atomic_available && (num_vars <= 3)) {
7551 retval = atomic_reduce_block;
7552 } else if (tree_available) {
7553 if ((reduce_size > (9 * sizeof(kmp_real64))) &&
7554 (reduce_size < (2000 * sizeof(kmp_real64)))) {
7555 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7556 }
7557 } // otherwise: use critical section
7558
7559#else
7560#error "Unknown or unsupported OS"
7561#endif
7562
7563#else
7564#error "Unknown or unsupported architecture"
7565#endif
7566 }
7567
7568 // KMP_FORCE_REDUCTION
7569
7570 // If the team is serialized (team_size == 1), ignore the forced reduction
7571 // method and stay with the unsynchronized method (empty_reduce_block)
7572 if (__kmp_force_reduction_method != reduction_method_not_defined &&
7573 team_size != 1) {
7574
7575 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
7576
7577 int atomic_available, tree_available;
7578
7579 switch ((forced_retval = __kmp_force_reduction_method)) {
7580 case critical_reduce_block:
7581 KMP_ASSERT(lck); // lck should be != 0
7582 break;
7583
7584 case atomic_reduce_block:
7585 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7586 if (!atomic_available) {
7587 KMP_WARNING(RedMethodNotSupported, "atomic");
7588 forced_retval = critical_reduce_block;
7589 }
7590 break;
7591
7592 case tree_reduce_block:
7593 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7594 if (!tree_available) {
7595 KMP_WARNING(RedMethodNotSupported, "tree");
7596 forced_retval = critical_reduce_block;
7597 } else {
7598#if KMP_FAST_REDUCTION_BARRIER
7599 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7600#endif
7601 }
7602 break;
7603
7604 default:
7605 KMP_ASSERT(0); // "unsupported method specified"
Jim Cownie5e8470a2013-09-27 10:38:44 +00007606 }
7607
Jonathan Peyton30419822017-05-12 18:01:32 +00007608 retval = forced_retval;
7609 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00007610
Jonathan Peyton30419822017-05-12 18:01:32 +00007611 KA_TRACE(10, ("reduction method selected=%08x\n", retval));
Jim Cownie5e8470a2013-09-27 10:38:44 +00007612
Jonathan Peyton30419822017-05-12 18:01:32 +00007613#undef FAST_REDUCTION_TREE_METHOD_GENERATED
7614#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7615
7616 return (retval);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007617}
7618
7619// this function is for testing set/get/determine reduce method
Jonathan Peyton30419822017-05-12 18:01:32 +00007620kmp_int32 __kmp_get_reduce_method(void) {
7621 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
Jim Cownie5e8470a2013-09-27 10:38:44 +00007622}